{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 5304, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.766478342749529e-08, "loss": 2.7958, "step": 1 }, { "epoch": 0.0, "learning_rate": 7.532956685499058e-08, "loss": 2.9543, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.1299435028248588e-07, "loss": 2.9552, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.5065913370998117e-07, "loss": 2.834, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.8832391713747647e-07, "loss": 2.9974, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.2598870056497177e-07, "loss": 2.9263, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.6365348399246707e-07, "loss": 2.9985, "step": 7 }, { "epoch": 0.0, "learning_rate": 3.0131826741996234e-07, "loss": 2.9096, "step": 8 }, { "epoch": 0.0, "learning_rate": 3.3898305084745766e-07, "loss": 2.882, "step": 9 }, { "epoch": 0.0, "learning_rate": 3.7664783427495294e-07, "loss": 3.1376, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.1431261770244826e-07, "loss": 2.9197, "step": 11 }, { "epoch": 0.0, "learning_rate": 4.5197740112994353e-07, "loss": 2.8967, "step": 12 }, { "epoch": 0.0, "learning_rate": 4.896421845574388e-07, "loss": 2.9393, "step": 13 }, { "epoch": 0.01, "learning_rate": 5.273069679849341e-07, "loss": 2.9146, "step": 14 }, { "epoch": 0.01, "learning_rate": 5.649717514124295e-07, "loss": 2.9065, "step": 15 }, { "epoch": 0.01, "learning_rate": 6.026365348399247e-07, "loss": 2.8763, "step": 16 }, { "epoch": 0.01, "learning_rate": 6.4030131826742e-07, "loss": 2.8309, "step": 17 }, { "epoch": 0.01, "learning_rate": 6.779661016949153e-07, "loss": 2.8165, "step": 18 }, { "epoch": 0.01, "learning_rate": 7.156308851224105e-07, "loss": 2.8588, "step": 19 }, { "epoch": 0.01, "learning_rate": 7.532956685499059e-07, "loss": 2.8875, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.909604519774013e-07, "loss": 2.7888, "step": 21 }, { "epoch": 0.01, "learning_rate": 8.286252354048965e-07, "loss": 2.7988, "step": 22 }, { "epoch": 0.01, "learning_rate": 8.662900188323918e-07, "loss": 2.7878, "step": 23 }, { "epoch": 0.01, "learning_rate": 9.039548022598871e-07, "loss": 2.6322, "step": 24 }, { "epoch": 0.01, "learning_rate": 9.416195856873824e-07, "loss": 2.6957, "step": 25 }, { "epoch": 0.01, "learning_rate": 9.792843691148776e-07, "loss": 2.5878, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.016949152542373e-06, "loss": 2.4839, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.0546139359698683e-06, "loss": 2.5511, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.0922787193973636e-06, "loss": 2.4919, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.129943502824859e-06, "loss": 2.3885, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.167608286252354e-06, "loss": 2.3704, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.2052730696798494e-06, "loss": 2.3086, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.2429378531073449e-06, "loss": 2.2356, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.28060263653484e-06, "loss": 2.1841, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.3182674199623351e-06, "loss": 2.2746, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.3559322033898307e-06, "loss": 2.1866, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.393596986817326e-06, "loss": 2.1649, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.431261770244821e-06, "loss": 2.182, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.4689265536723166e-06, "loss": 2.1103, "step": 39 }, { "epoch": 0.02, "learning_rate": 1.5065913370998117e-06, "loss": 2.0646, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.544256120527307e-06, "loss": 2.0584, "step": 41 }, { "epoch": 0.02, "learning_rate": 1.5819209039548026e-06, "loss": 1.9309, "step": 42 }, { "epoch": 0.02, "learning_rate": 1.6195856873822977e-06, "loss": 2.0101, "step": 43 }, { "epoch": 0.02, "learning_rate": 1.657250470809793e-06, "loss": 1.9791, "step": 44 }, { "epoch": 0.02, "learning_rate": 1.6949152542372882e-06, "loss": 1.8432, "step": 45 }, { "epoch": 0.02, "learning_rate": 1.7325800376647837e-06, "loss": 1.8036, "step": 46 }, { "epoch": 0.02, "learning_rate": 1.7702448210922788e-06, "loss": 1.7585, "step": 47 }, { "epoch": 0.02, "learning_rate": 1.8079096045197741e-06, "loss": 1.7535, "step": 48 }, { "epoch": 0.02, "learning_rate": 1.8455743879472697e-06, "loss": 1.7353, "step": 49 }, { "epoch": 0.02, "learning_rate": 1.8832391713747648e-06, "loss": 1.7066, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.92090395480226e-06, "loss": 1.746, "step": 51 }, { "epoch": 0.02, "learning_rate": 1.9585687382297552e-06, "loss": 1.5842, "step": 52 }, { "epoch": 0.02, "learning_rate": 1.9962335216572505e-06, "loss": 1.7291, "step": 53 }, { "epoch": 0.02, "learning_rate": 2.033898305084746e-06, "loss": 1.6316, "step": 54 }, { "epoch": 0.02, "learning_rate": 2.071563088512241e-06, "loss": 1.6009, "step": 55 }, { "epoch": 0.02, "learning_rate": 2.1092278719397365e-06, "loss": 1.4927, "step": 56 }, { "epoch": 0.02, "learning_rate": 2.146892655367232e-06, "loss": 1.5311, "step": 57 }, { "epoch": 0.02, "learning_rate": 2.184557438794727e-06, "loss": 1.5646, "step": 58 }, { "epoch": 0.02, "learning_rate": 2.222222222222222e-06, "loss": 1.5793, "step": 59 }, { "epoch": 0.02, "learning_rate": 2.259887005649718e-06, "loss": 1.5498, "step": 60 }, { "epoch": 0.02, "learning_rate": 2.297551789077213e-06, "loss": 1.5575, "step": 61 }, { "epoch": 0.02, "learning_rate": 2.335216572504708e-06, "loss": 1.4041, "step": 62 }, { "epoch": 0.02, "learning_rate": 2.372881355932204e-06, "loss": 1.4083, "step": 63 }, { "epoch": 0.02, "learning_rate": 2.4105461393596987e-06, "loss": 1.4854, "step": 64 }, { "epoch": 0.02, "learning_rate": 2.448210922787194e-06, "loss": 1.5078, "step": 65 }, { "epoch": 0.02, "learning_rate": 2.4858757062146898e-06, "loss": 1.4387, "step": 66 }, { "epoch": 0.03, "learning_rate": 2.523540489642185e-06, "loss": 1.4786, "step": 67 }, { "epoch": 0.03, "learning_rate": 2.56120527306968e-06, "loss": 1.3294, "step": 68 }, { "epoch": 0.03, "learning_rate": 2.5988700564971753e-06, "loss": 1.3316, "step": 69 }, { "epoch": 0.03, "learning_rate": 2.6365348399246702e-06, "loss": 1.4648, "step": 70 }, { "epoch": 0.03, "learning_rate": 2.674199623352166e-06, "loss": 1.2828, "step": 71 }, { "epoch": 0.03, "learning_rate": 2.7118644067796613e-06, "loss": 1.3085, "step": 72 }, { "epoch": 0.03, "learning_rate": 2.749529190207156e-06, "loss": 1.2677, "step": 73 }, { "epoch": 0.03, "learning_rate": 2.787193973634652e-06, "loss": 1.3734, "step": 74 }, { "epoch": 0.03, "learning_rate": 2.8248587570621473e-06, "loss": 1.3819, "step": 75 }, { "epoch": 0.03, "learning_rate": 2.862523540489642e-06, "loss": 1.3106, "step": 76 }, { "epoch": 0.03, "learning_rate": 2.9001883239171375e-06, "loss": 1.2493, "step": 77 }, { "epoch": 0.03, "learning_rate": 2.9378531073446333e-06, "loss": 1.2681, "step": 78 }, { "epoch": 0.03, "learning_rate": 2.975517890772128e-06, "loss": 1.4092, "step": 79 }, { "epoch": 0.03, "learning_rate": 3.0131826741996235e-06, "loss": 1.2499, "step": 80 }, { "epoch": 0.03, "learning_rate": 3.0508474576271192e-06, "loss": 1.3015, "step": 81 }, { "epoch": 0.03, "learning_rate": 3.088512241054614e-06, "loss": 1.2523, "step": 82 }, { "epoch": 0.03, "learning_rate": 3.1261770244821095e-06, "loss": 1.4283, "step": 83 }, { "epoch": 0.03, "learning_rate": 3.163841807909605e-06, "loss": 1.299, "step": 84 }, { "epoch": 0.03, "learning_rate": 3.2015065913371e-06, "loss": 1.2608, "step": 85 }, { "epoch": 0.03, "learning_rate": 3.2391713747645954e-06, "loss": 1.1724, "step": 86 }, { "epoch": 0.03, "learning_rate": 3.2768361581920903e-06, "loss": 1.2671, "step": 87 }, { "epoch": 0.03, "learning_rate": 3.314500941619586e-06, "loss": 1.2867, "step": 88 }, { "epoch": 0.03, "learning_rate": 3.3521657250470814e-06, "loss": 1.1624, "step": 89 }, { "epoch": 0.03, "learning_rate": 3.3898305084745763e-06, "loss": 1.2359, "step": 90 }, { "epoch": 0.03, "learning_rate": 3.427495291902072e-06, "loss": 1.2357, "step": 91 }, { "epoch": 0.03, "learning_rate": 3.4651600753295674e-06, "loss": 1.2683, "step": 92 }, { "epoch": 0.04, "learning_rate": 3.5028248587570623e-06, "loss": 1.1811, "step": 93 }, { "epoch": 0.04, "learning_rate": 3.5404896421845576e-06, "loss": 1.2737, "step": 94 }, { "epoch": 0.04, "learning_rate": 3.5781544256120534e-06, "loss": 1.2361, "step": 95 }, { "epoch": 0.04, "learning_rate": 3.6158192090395483e-06, "loss": 1.2034, "step": 96 }, { "epoch": 0.04, "learning_rate": 3.6534839924670436e-06, "loss": 1.2082, "step": 97 }, { "epoch": 0.04, "learning_rate": 3.6911487758945393e-06, "loss": 1.1567, "step": 98 }, { "epoch": 0.04, "learning_rate": 3.7288135593220342e-06, "loss": 1.1574, "step": 99 }, { "epoch": 0.04, "learning_rate": 3.7664783427495296e-06, "loss": 1.1934, "step": 100 }, { "epoch": 0.04, "learning_rate": 3.8041431261770245e-06, "loss": 1.2106, "step": 101 }, { "epoch": 0.04, "learning_rate": 3.84180790960452e-06, "loss": 1.0902, "step": 102 }, { "epoch": 0.04, "learning_rate": 3.879472693032015e-06, "loss": 1.1199, "step": 103 }, { "epoch": 0.04, "learning_rate": 3.9171374764595104e-06, "loss": 1.119, "step": 104 }, { "epoch": 0.04, "learning_rate": 3.954802259887006e-06, "loss": 1.1105, "step": 105 }, { "epoch": 0.04, "learning_rate": 3.992467043314501e-06, "loss": 1.1195, "step": 106 }, { "epoch": 0.04, "learning_rate": 4.030131826741996e-06, "loss": 1.1678, "step": 107 }, { "epoch": 0.04, "learning_rate": 4.067796610169492e-06, "loss": 1.113, "step": 108 }, { "epoch": 0.04, "learning_rate": 4.105461393596987e-06, "loss": 1.1583, "step": 109 }, { "epoch": 0.04, "learning_rate": 4.143126177024482e-06, "loss": 1.0594, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.180790960451978e-06, "loss": 1.1242, "step": 111 }, { "epoch": 0.04, "learning_rate": 4.218455743879473e-06, "loss": 1.1345, "step": 112 }, { "epoch": 0.04, "learning_rate": 4.256120527306968e-06, "loss": 0.989, "step": 113 }, { "epoch": 0.04, "learning_rate": 4.293785310734464e-06, "loss": 1.0713, "step": 114 }, { "epoch": 0.04, "learning_rate": 4.331450094161959e-06, "loss": 1.1164, "step": 115 }, { "epoch": 0.04, "learning_rate": 4.369114877589454e-06, "loss": 1.0326, "step": 116 }, { "epoch": 0.04, "learning_rate": 4.40677966101695e-06, "loss": 1.1188, "step": 117 }, { "epoch": 0.04, "learning_rate": 4.444444444444444e-06, "loss": 1.012, "step": 118 }, { "epoch": 0.04, "learning_rate": 4.48210922787194e-06, "loss": 1.1291, "step": 119 }, { "epoch": 0.05, "learning_rate": 4.519774011299436e-06, "loss": 0.9849, "step": 120 }, { "epoch": 0.05, "learning_rate": 4.55743879472693e-06, "loss": 1.0921, "step": 121 }, { "epoch": 0.05, "learning_rate": 4.595103578154426e-06, "loss": 1.0153, "step": 122 }, { "epoch": 0.05, "learning_rate": 4.632768361581922e-06, "loss": 1.0806, "step": 123 }, { "epoch": 0.05, "learning_rate": 4.670433145009416e-06, "loss": 1.1182, "step": 124 }, { "epoch": 0.05, "learning_rate": 4.7080979284369114e-06, "loss": 1.0514, "step": 125 }, { "epoch": 0.05, "learning_rate": 4.745762711864408e-06, "loss": 1.0047, "step": 126 }, { "epoch": 0.05, "learning_rate": 4.783427495291902e-06, "loss": 1.0249, "step": 127 }, { "epoch": 0.05, "learning_rate": 4.821092278719397e-06, "loss": 1.0153, "step": 128 }, { "epoch": 0.05, "learning_rate": 4.8587570621468936e-06, "loss": 1.0094, "step": 129 }, { "epoch": 0.05, "learning_rate": 4.896421845574388e-06, "loss": 1.0071, "step": 130 }, { "epoch": 0.05, "learning_rate": 4.934086629001883e-06, "loss": 0.9576, "step": 131 }, { "epoch": 0.05, "learning_rate": 4.9717514124293796e-06, "loss": 0.9672, "step": 132 }, { "epoch": 0.05, "learning_rate": 5.009416195856874e-06, "loss": 1.0385, "step": 133 }, { "epoch": 0.05, "learning_rate": 5.04708097928437e-06, "loss": 1.0006, "step": 134 }, { "epoch": 0.05, "learning_rate": 5.084745762711865e-06, "loss": 0.978, "step": 135 }, { "epoch": 0.05, "learning_rate": 5.12241054613936e-06, "loss": 1.0089, "step": 136 }, { "epoch": 0.05, "learning_rate": 5.1600753295668545e-06, "loss": 0.9681, "step": 137 }, { "epoch": 0.05, "learning_rate": 5.197740112994351e-06, "loss": 0.9223, "step": 138 }, { "epoch": 0.05, "learning_rate": 5.235404896421846e-06, "loss": 0.9802, "step": 139 }, { "epoch": 0.05, "learning_rate": 5.2730696798493405e-06, "loss": 0.946, "step": 140 }, { "epoch": 0.05, "learning_rate": 5.310734463276837e-06, "loss": 0.947, "step": 141 }, { "epoch": 0.05, "learning_rate": 5.348399246704332e-06, "loss": 0.968, "step": 142 }, { "epoch": 0.05, "learning_rate": 5.3860640301318264e-06, "loss": 0.963, "step": 143 }, { "epoch": 0.05, "learning_rate": 5.423728813559323e-06, "loss": 1.0149, "step": 144 }, { "epoch": 0.05, "learning_rate": 5.461393596986818e-06, "loss": 0.9749, "step": 145 }, { "epoch": 0.06, "learning_rate": 5.499058380414312e-06, "loss": 0.9374, "step": 146 }, { "epoch": 0.06, "learning_rate": 5.536723163841809e-06, "loss": 0.9184, "step": 147 }, { "epoch": 0.06, "learning_rate": 5.574387947269304e-06, "loss": 0.9298, "step": 148 }, { "epoch": 0.06, "learning_rate": 5.612052730696798e-06, "loss": 0.9694, "step": 149 }, { "epoch": 0.06, "learning_rate": 5.6497175141242946e-06, "loss": 0.8965, "step": 150 }, { "epoch": 0.06, "learning_rate": 5.68738229755179e-06, "loss": 0.9559, "step": 151 }, { "epoch": 0.06, "learning_rate": 5.725047080979284e-06, "loss": 0.8978, "step": 152 }, { "epoch": 0.06, "learning_rate": 5.7627118644067805e-06, "loss": 0.9007, "step": 153 }, { "epoch": 0.06, "learning_rate": 5.800376647834275e-06, "loss": 0.9264, "step": 154 }, { "epoch": 0.06, "learning_rate": 5.83804143126177e-06, "loss": 0.8754, "step": 155 }, { "epoch": 0.06, "learning_rate": 5.8757062146892665e-06, "loss": 0.9041, "step": 156 }, { "epoch": 0.06, "learning_rate": 5.913370998116761e-06, "loss": 0.9266, "step": 157 }, { "epoch": 0.06, "learning_rate": 5.951035781544256e-06, "loss": 0.8811, "step": 158 }, { "epoch": 0.06, "learning_rate": 5.9887005649717525e-06, "loss": 0.9209, "step": 159 }, { "epoch": 0.06, "learning_rate": 6.026365348399247e-06, "loss": 0.9324, "step": 160 }, { "epoch": 0.06, "learning_rate": 6.064030131826742e-06, "loss": 0.8942, "step": 161 }, { "epoch": 0.06, "learning_rate": 6.1016949152542385e-06, "loss": 0.8793, "step": 162 }, { "epoch": 0.06, "learning_rate": 6.139359698681733e-06, "loss": 0.9139, "step": 163 }, { "epoch": 0.06, "learning_rate": 6.177024482109228e-06, "loss": 0.9058, "step": 164 }, { "epoch": 0.06, "learning_rate": 6.2146892655367244e-06, "loss": 0.9512, "step": 165 }, { "epoch": 0.06, "learning_rate": 6.252354048964219e-06, "loss": 0.8061, "step": 166 }, { "epoch": 0.06, "learning_rate": 6.290018832391714e-06, "loss": 0.9571, "step": 167 }, { "epoch": 0.06, "learning_rate": 6.32768361581921e-06, "loss": 0.8328, "step": 168 }, { "epoch": 0.06, "learning_rate": 6.365348399246705e-06, "loss": 0.8713, "step": 169 }, { "epoch": 0.06, "learning_rate": 6.4030131826742e-06, "loss": 0.952, "step": 170 }, { "epoch": 0.06, "learning_rate": 6.440677966101695e-06, "loss": 0.9658, "step": 171 }, { "epoch": 0.06, "learning_rate": 6.478342749529191e-06, "loss": 0.8768, "step": 172 }, { "epoch": 0.07, "learning_rate": 6.516007532956686e-06, "loss": 0.7777, "step": 173 }, { "epoch": 0.07, "learning_rate": 6.553672316384181e-06, "loss": 0.8408, "step": 174 }, { "epoch": 0.07, "learning_rate": 6.591337099811677e-06, "loss": 0.8953, "step": 175 }, { "epoch": 0.07, "learning_rate": 6.629001883239172e-06, "loss": 0.9684, "step": 176 }, { "epoch": 0.07, "learning_rate": 6.666666666666667e-06, "loss": 0.9214, "step": 177 }, { "epoch": 0.07, "learning_rate": 6.704331450094163e-06, "loss": 0.8246, "step": 178 }, { "epoch": 0.07, "learning_rate": 6.741996233521658e-06, "loss": 0.9018, "step": 179 }, { "epoch": 0.07, "learning_rate": 6.779661016949153e-06, "loss": 0.8474, "step": 180 }, { "epoch": 0.07, "learning_rate": 6.817325800376649e-06, "loss": 0.8242, "step": 181 }, { "epoch": 0.07, "learning_rate": 6.854990583804144e-06, "loss": 0.8341, "step": 182 }, { "epoch": 0.07, "learning_rate": 6.892655367231639e-06, "loss": 0.8592, "step": 183 }, { "epoch": 0.07, "learning_rate": 6.930320150659135e-06, "loss": 0.8749, "step": 184 }, { "epoch": 0.07, "learning_rate": 6.96798493408663e-06, "loss": 0.8052, "step": 185 }, { "epoch": 0.07, "learning_rate": 7.0056497175141246e-06, "loss": 0.7766, "step": 186 }, { "epoch": 0.07, "learning_rate": 7.043314500941621e-06, "loss": 0.8643, "step": 187 }, { "epoch": 0.07, "learning_rate": 7.080979284369115e-06, "loss": 0.7742, "step": 188 }, { "epoch": 0.07, "learning_rate": 7.1186440677966106e-06, "loss": 0.859, "step": 189 }, { "epoch": 0.07, "learning_rate": 7.156308851224107e-06, "loss": 0.8215, "step": 190 }, { "epoch": 0.07, "learning_rate": 7.193973634651601e-06, "loss": 0.8154, "step": 191 }, { "epoch": 0.07, "learning_rate": 7.2316384180790965e-06, "loss": 0.8496, "step": 192 }, { "epoch": 0.07, "learning_rate": 7.269303201506593e-06, "loss": 0.8587, "step": 193 }, { "epoch": 0.07, "learning_rate": 7.306967984934087e-06, "loss": 0.8836, "step": 194 }, { "epoch": 0.07, "learning_rate": 7.3446327683615825e-06, "loss": 0.76, "step": 195 }, { "epoch": 0.07, "learning_rate": 7.382297551789079e-06, "loss": 0.7746, "step": 196 }, { "epoch": 0.07, "learning_rate": 7.419962335216573e-06, "loss": 0.8502, "step": 197 }, { "epoch": 0.07, "learning_rate": 7.4576271186440685e-06, "loss": 0.8591, "step": 198 }, { "epoch": 0.08, "learning_rate": 7.495291902071564e-06, "loss": 0.8472, "step": 199 }, { "epoch": 0.08, "learning_rate": 7.532956685499059e-06, "loss": 0.8437, "step": 200 }, { "epoch": 0.08, "learning_rate": 7.5706214689265545e-06, "loss": 0.8408, "step": 201 }, { "epoch": 0.08, "learning_rate": 7.608286252354049e-06, "loss": 0.8462, "step": 202 }, { "epoch": 0.08, "learning_rate": 7.645951035781545e-06, "loss": 0.8009, "step": 203 }, { "epoch": 0.08, "learning_rate": 7.68361581920904e-06, "loss": 0.7837, "step": 204 }, { "epoch": 0.08, "learning_rate": 7.721280602636536e-06, "loss": 0.8586, "step": 205 }, { "epoch": 0.08, "learning_rate": 7.75894538606403e-06, "loss": 0.7854, "step": 206 }, { "epoch": 0.08, "learning_rate": 7.796610169491526e-06, "loss": 0.8275, "step": 207 }, { "epoch": 0.08, "learning_rate": 7.834274952919021e-06, "loss": 0.8777, "step": 208 }, { "epoch": 0.08, "learning_rate": 7.871939736346517e-06, "loss": 0.8624, "step": 209 }, { "epoch": 0.08, "learning_rate": 7.909604519774012e-06, "loss": 0.7894, "step": 210 }, { "epoch": 0.08, "learning_rate": 7.947269303201508e-06, "loss": 0.8056, "step": 211 }, { "epoch": 0.08, "learning_rate": 7.984934086629002e-06, "loss": 0.8312, "step": 212 }, { "epoch": 0.08, "learning_rate": 8.022598870056498e-06, "loss": 0.8066, "step": 213 }, { "epoch": 0.08, "learning_rate": 8.060263653483993e-06, "loss": 0.804, "step": 214 }, { "epoch": 0.08, "learning_rate": 8.097928436911489e-06, "loss": 0.7917, "step": 215 }, { "epoch": 0.08, "learning_rate": 8.135593220338983e-06, "loss": 0.8534, "step": 216 }, { "epoch": 0.08, "learning_rate": 8.17325800376648e-06, "loss": 0.7742, "step": 217 }, { "epoch": 0.08, "learning_rate": 8.210922787193974e-06, "loss": 0.8468, "step": 218 }, { "epoch": 0.08, "learning_rate": 8.248587570621469e-06, "loss": 0.837, "step": 219 }, { "epoch": 0.08, "learning_rate": 8.286252354048965e-06, "loss": 0.8983, "step": 220 }, { "epoch": 0.08, "learning_rate": 8.323917137476461e-06, "loss": 0.8001, "step": 221 }, { "epoch": 0.08, "learning_rate": 8.361581920903955e-06, "loss": 0.7461, "step": 222 }, { "epoch": 0.08, "learning_rate": 8.39924670433145e-06, "loss": 0.7429, "step": 223 }, { "epoch": 0.08, "learning_rate": 8.436911487758946e-06, "loss": 0.7835, "step": 224 }, { "epoch": 0.08, "learning_rate": 8.47457627118644e-06, "loss": 0.7939, "step": 225 }, { "epoch": 0.09, "learning_rate": 8.512241054613937e-06, "loss": 0.797, "step": 226 }, { "epoch": 0.09, "learning_rate": 8.549905838041433e-06, "loss": 0.7854, "step": 227 }, { "epoch": 0.09, "learning_rate": 8.587570621468927e-06, "loss": 0.7584, "step": 228 }, { "epoch": 0.09, "learning_rate": 8.625235404896422e-06, "loss": 0.8086, "step": 229 }, { "epoch": 0.09, "learning_rate": 8.662900188323918e-06, "loss": 0.7379, "step": 230 }, { "epoch": 0.09, "learning_rate": 8.700564971751413e-06, "loss": 0.743, "step": 231 }, { "epoch": 0.09, "learning_rate": 8.738229755178909e-06, "loss": 0.735, "step": 232 }, { "epoch": 0.09, "learning_rate": 8.775894538606405e-06, "loss": 0.8161, "step": 233 }, { "epoch": 0.09, "learning_rate": 8.8135593220339e-06, "loss": 0.7673, "step": 234 }, { "epoch": 0.09, "learning_rate": 8.851224105461394e-06, "loss": 0.8588, "step": 235 }, { "epoch": 0.09, "learning_rate": 8.888888888888888e-06, "loss": 0.8527, "step": 236 }, { "epoch": 0.09, "learning_rate": 8.926553672316384e-06, "loss": 0.73, "step": 237 }, { "epoch": 0.09, "learning_rate": 8.96421845574388e-06, "loss": 0.6783, "step": 238 }, { "epoch": 0.09, "learning_rate": 9.001883239171375e-06, "loss": 0.7665, "step": 239 }, { "epoch": 0.09, "learning_rate": 9.039548022598871e-06, "loss": 0.7746, "step": 240 }, { "epoch": 0.09, "learning_rate": 9.077212806026366e-06, "loss": 0.8216, "step": 241 }, { "epoch": 0.09, "learning_rate": 9.11487758945386e-06, "loss": 0.7907, "step": 242 }, { "epoch": 0.09, "learning_rate": 9.152542372881356e-06, "loss": 0.8099, "step": 243 }, { "epoch": 0.09, "learning_rate": 9.190207156308853e-06, "loss": 0.7314, "step": 244 }, { "epoch": 0.09, "learning_rate": 9.227871939736347e-06, "loss": 0.7974, "step": 245 }, { "epoch": 0.09, "learning_rate": 9.265536723163843e-06, "loss": 0.7284, "step": 246 }, { "epoch": 0.09, "learning_rate": 9.303201506591338e-06, "loss": 0.7891, "step": 247 }, { "epoch": 0.09, "learning_rate": 9.340866290018832e-06, "loss": 0.7664, "step": 248 }, { "epoch": 0.09, "learning_rate": 9.378531073446328e-06, "loss": 0.7003, "step": 249 }, { "epoch": 0.09, "learning_rate": 9.416195856873823e-06, "loss": 0.795, "step": 250 }, { "epoch": 0.09, "learning_rate": 9.453860640301319e-06, "loss": 0.7965, "step": 251 }, { "epoch": 0.1, "learning_rate": 9.491525423728815e-06, "loss": 0.8259, "step": 252 }, { "epoch": 0.1, "learning_rate": 9.52919020715631e-06, "loss": 0.791, "step": 253 }, { "epoch": 0.1, "learning_rate": 9.566854990583804e-06, "loss": 0.7828, "step": 254 }, { "epoch": 0.1, "learning_rate": 9.6045197740113e-06, "loss": 0.7607, "step": 255 }, { "epoch": 0.1, "learning_rate": 9.642184557438795e-06, "loss": 0.7618, "step": 256 }, { "epoch": 0.1, "learning_rate": 9.679849340866291e-06, "loss": 0.7659, "step": 257 }, { "epoch": 0.1, "learning_rate": 9.717514124293787e-06, "loss": 0.7707, "step": 258 }, { "epoch": 0.1, "learning_rate": 9.755178907721282e-06, "loss": 0.7416, "step": 259 }, { "epoch": 0.1, "learning_rate": 9.792843691148776e-06, "loss": 0.7529, "step": 260 }, { "epoch": 0.1, "learning_rate": 9.830508474576272e-06, "loss": 0.7384, "step": 261 }, { "epoch": 0.1, "learning_rate": 9.868173258003767e-06, "loss": 0.8687, "step": 262 }, { "epoch": 0.1, "learning_rate": 9.905838041431263e-06, "loss": 0.7384, "step": 263 }, { "epoch": 0.1, "learning_rate": 9.943502824858759e-06, "loss": 0.7522, "step": 264 }, { "epoch": 0.1, "learning_rate": 9.981167608286254e-06, "loss": 0.762, "step": 265 }, { "epoch": 0.1, "learning_rate": 1.0018832391713748e-05, "loss": 0.7432, "step": 266 }, { "epoch": 0.1, "learning_rate": 1.0056497175141244e-05, "loss": 0.7116, "step": 267 }, { "epoch": 0.1, "learning_rate": 1.009416195856874e-05, "loss": 0.7312, "step": 268 }, { "epoch": 0.1, "learning_rate": 1.0131826741996233e-05, "loss": 0.8168, "step": 269 }, { "epoch": 0.1, "learning_rate": 1.016949152542373e-05, "loss": 0.7433, "step": 270 }, { "epoch": 0.1, "learning_rate": 1.0207156308851226e-05, "loss": 0.723, "step": 271 }, { "epoch": 0.1, "learning_rate": 1.024482109227872e-05, "loss": 0.7029, "step": 272 }, { "epoch": 0.1, "learning_rate": 1.0282485875706216e-05, "loss": 0.723, "step": 273 }, { "epoch": 0.1, "learning_rate": 1.0320150659133709e-05, "loss": 0.7161, "step": 274 }, { "epoch": 0.1, "learning_rate": 1.0357815442561205e-05, "loss": 0.7008, "step": 275 }, { "epoch": 0.1, "learning_rate": 1.0395480225988701e-05, "loss": 0.7299, "step": 276 }, { "epoch": 0.1, "learning_rate": 1.0433145009416197e-05, "loss": 0.7063, "step": 277 }, { "epoch": 0.1, "learning_rate": 1.0470809792843692e-05, "loss": 0.752, "step": 278 }, { "epoch": 0.11, "learning_rate": 1.0508474576271188e-05, "loss": 0.6854, "step": 279 }, { "epoch": 0.11, "learning_rate": 1.0546139359698681e-05, "loss": 0.7335, "step": 280 }, { "epoch": 0.11, "learning_rate": 1.0583804143126177e-05, "loss": 0.7774, "step": 281 }, { "epoch": 0.11, "learning_rate": 1.0621468926553673e-05, "loss": 0.7728, "step": 282 }, { "epoch": 0.11, "learning_rate": 1.065913370998117e-05, "loss": 0.8001, "step": 283 }, { "epoch": 0.11, "learning_rate": 1.0696798493408664e-05, "loss": 0.7686, "step": 284 }, { "epoch": 0.11, "learning_rate": 1.073446327683616e-05, "loss": 0.7703, "step": 285 }, { "epoch": 0.11, "learning_rate": 1.0772128060263653e-05, "loss": 0.7748, "step": 286 }, { "epoch": 0.11, "learning_rate": 1.0809792843691149e-05, "loss": 0.7788, "step": 287 }, { "epoch": 0.11, "learning_rate": 1.0847457627118645e-05, "loss": 0.8058, "step": 288 }, { "epoch": 0.11, "learning_rate": 1.0885122410546141e-05, "loss": 0.7305, "step": 289 }, { "epoch": 0.11, "learning_rate": 1.0922787193973636e-05, "loss": 0.7284, "step": 290 }, { "epoch": 0.11, "learning_rate": 1.096045197740113e-05, "loss": 0.7559, "step": 291 }, { "epoch": 0.11, "learning_rate": 1.0998116760828625e-05, "loss": 0.7302, "step": 292 }, { "epoch": 0.11, "learning_rate": 1.1035781544256121e-05, "loss": 0.7293, "step": 293 }, { "epoch": 0.11, "learning_rate": 1.1073446327683617e-05, "loss": 0.7226, "step": 294 }, { "epoch": 0.11, "learning_rate": 1.1111111111111113e-05, "loss": 0.7703, "step": 295 }, { "epoch": 0.11, "learning_rate": 1.1148775894538608e-05, "loss": 0.7186, "step": 296 }, { "epoch": 0.11, "learning_rate": 1.1186440677966102e-05, "loss": 0.713, "step": 297 }, { "epoch": 0.11, "learning_rate": 1.1224105461393597e-05, "loss": 0.7414, "step": 298 }, { "epoch": 0.11, "learning_rate": 1.1261770244821093e-05, "loss": 0.7845, "step": 299 }, { "epoch": 0.11, "learning_rate": 1.1299435028248589e-05, "loss": 0.7245, "step": 300 }, { "epoch": 0.11, "learning_rate": 1.1337099811676084e-05, "loss": 0.7402, "step": 301 }, { "epoch": 0.11, "learning_rate": 1.137476459510358e-05, "loss": 0.7206, "step": 302 }, { "epoch": 0.11, "learning_rate": 1.1412429378531074e-05, "loss": 0.8149, "step": 303 }, { "epoch": 0.11, "learning_rate": 1.1450094161958569e-05, "loss": 0.7169, "step": 304 }, { "epoch": 0.12, "learning_rate": 1.1487758945386065e-05, "loss": 0.7212, "step": 305 }, { "epoch": 0.12, "learning_rate": 1.1525423728813561e-05, "loss": 0.7477, "step": 306 }, { "epoch": 0.12, "learning_rate": 1.1563088512241056e-05, "loss": 0.7458, "step": 307 }, { "epoch": 0.12, "learning_rate": 1.160075329566855e-05, "loss": 0.7411, "step": 308 }, { "epoch": 0.12, "learning_rate": 1.1638418079096046e-05, "loss": 0.8036, "step": 309 }, { "epoch": 0.12, "learning_rate": 1.167608286252354e-05, "loss": 0.7883, "step": 310 }, { "epoch": 0.12, "learning_rate": 1.1713747645951037e-05, "loss": 0.6819, "step": 311 }, { "epoch": 0.12, "learning_rate": 1.1751412429378533e-05, "loss": 0.7093, "step": 312 }, { "epoch": 0.12, "learning_rate": 1.1789077212806027e-05, "loss": 0.7291, "step": 313 }, { "epoch": 0.12, "learning_rate": 1.1826741996233522e-05, "loss": 0.7524, "step": 314 }, { "epoch": 0.12, "learning_rate": 1.1864406779661018e-05, "loss": 0.7628, "step": 315 }, { "epoch": 0.12, "learning_rate": 1.1902071563088513e-05, "loss": 0.7376, "step": 316 }, { "epoch": 0.12, "learning_rate": 1.1939736346516009e-05, "loss": 0.7767, "step": 317 }, { "epoch": 0.12, "learning_rate": 1.1977401129943505e-05, "loss": 0.7589, "step": 318 }, { "epoch": 0.12, "learning_rate": 1.2015065913371e-05, "loss": 0.7414, "step": 319 }, { "epoch": 0.12, "learning_rate": 1.2052730696798494e-05, "loss": 0.7842, "step": 320 }, { "epoch": 0.12, "learning_rate": 1.209039548022599e-05, "loss": 0.7349, "step": 321 }, { "epoch": 0.12, "learning_rate": 1.2128060263653485e-05, "loss": 0.7215, "step": 322 }, { "epoch": 0.12, "learning_rate": 1.216572504708098e-05, "loss": 0.754, "step": 323 }, { "epoch": 0.12, "learning_rate": 1.2203389830508477e-05, "loss": 0.6921, "step": 324 }, { "epoch": 0.12, "learning_rate": 1.224105461393597e-05, "loss": 0.7369, "step": 325 }, { "epoch": 0.12, "learning_rate": 1.2278719397363466e-05, "loss": 0.7503, "step": 326 }, { "epoch": 0.12, "learning_rate": 1.2316384180790962e-05, "loss": 0.7398, "step": 327 }, { "epoch": 0.12, "learning_rate": 1.2354048964218457e-05, "loss": 0.6805, "step": 328 }, { "epoch": 0.12, "learning_rate": 1.2391713747645953e-05, "loss": 0.7261, "step": 329 }, { "epoch": 0.12, "learning_rate": 1.2429378531073449e-05, "loss": 0.7273, "step": 330 }, { "epoch": 0.12, "learning_rate": 1.2467043314500942e-05, "loss": 0.7212, "step": 331 }, { "epoch": 0.13, "learning_rate": 1.2504708097928438e-05, "loss": 0.7146, "step": 332 }, { "epoch": 0.13, "learning_rate": 1.2542372881355932e-05, "loss": 0.7155, "step": 333 }, { "epoch": 0.13, "learning_rate": 1.2580037664783428e-05, "loss": 0.728, "step": 334 }, { "epoch": 0.13, "learning_rate": 1.2617702448210925e-05, "loss": 0.7132, "step": 335 }, { "epoch": 0.13, "learning_rate": 1.265536723163842e-05, "loss": 0.7534, "step": 336 }, { "epoch": 0.13, "learning_rate": 1.2693032015065914e-05, "loss": 0.6521, "step": 337 }, { "epoch": 0.13, "learning_rate": 1.273069679849341e-05, "loss": 0.7023, "step": 338 }, { "epoch": 0.13, "learning_rate": 1.2768361581920904e-05, "loss": 0.7363, "step": 339 }, { "epoch": 0.13, "learning_rate": 1.28060263653484e-05, "loss": 0.7118, "step": 340 }, { "epoch": 0.13, "learning_rate": 1.2843691148775897e-05, "loss": 0.7064, "step": 341 }, { "epoch": 0.13, "learning_rate": 1.288135593220339e-05, "loss": 0.6741, "step": 342 }, { "epoch": 0.13, "learning_rate": 1.2919020715630886e-05, "loss": 0.717, "step": 343 }, { "epoch": 0.13, "learning_rate": 1.2956685499058382e-05, "loss": 0.6158, "step": 344 }, { "epoch": 0.13, "learning_rate": 1.2994350282485876e-05, "loss": 0.6239, "step": 345 }, { "epoch": 0.13, "learning_rate": 1.3032015065913372e-05, "loss": 0.6911, "step": 346 }, { "epoch": 0.13, "learning_rate": 1.3069679849340869e-05, "loss": 0.7076, "step": 347 }, { "epoch": 0.13, "learning_rate": 1.3107344632768361e-05, "loss": 0.6818, "step": 348 }, { "epoch": 0.13, "learning_rate": 1.3145009416195858e-05, "loss": 0.7071, "step": 349 }, { "epoch": 0.13, "learning_rate": 1.3182674199623354e-05, "loss": 0.7004, "step": 350 }, { "epoch": 0.13, "learning_rate": 1.3220338983050848e-05, "loss": 0.7206, "step": 351 }, { "epoch": 0.13, "learning_rate": 1.3258003766478344e-05, "loss": 0.7443, "step": 352 }, { "epoch": 0.13, "learning_rate": 1.329566854990584e-05, "loss": 0.7391, "step": 353 }, { "epoch": 0.13, "learning_rate": 1.3333333333333333e-05, "loss": 0.7109, "step": 354 }, { "epoch": 0.13, "learning_rate": 1.337099811676083e-05, "loss": 0.7377, "step": 355 }, { "epoch": 0.13, "learning_rate": 1.3408662900188326e-05, "loss": 0.7183, "step": 356 }, { "epoch": 0.13, "learning_rate": 1.344632768361582e-05, "loss": 0.7498, "step": 357 }, { "epoch": 0.13, "learning_rate": 1.3483992467043316e-05, "loss": 0.6649, "step": 358 }, { "epoch": 0.14, "learning_rate": 1.3521657250470809e-05, "loss": 0.7041, "step": 359 }, { "epoch": 0.14, "learning_rate": 1.3559322033898305e-05, "loss": 0.6957, "step": 360 }, { "epoch": 0.14, "learning_rate": 1.3596986817325801e-05, "loss": 0.713, "step": 361 }, { "epoch": 0.14, "learning_rate": 1.3634651600753298e-05, "loss": 0.7142, "step": 362 }, { "epoch": 0.14, "learning_rate": 1.3672316384180792e-05, "loss": 0.6472, "step": 363 }, { "epoch": 0.14, "learning_rate": 1.3709981167608288e-05, "loss": 0.7288, "step": 364 }, { "epoch": 0.14, "learning_rate": 1.3747645951035781e-05, "loss": 0.6957, "step": 365 }, { "epoch": 0.14, "learning_rate": 1.3785310734463277e-05, "loss": 0.6815, "step": 366 }, { "epoch": 0.14, "learning_rate": 1.3822975517890773e-05, "loss": 0.726, "step": 367 }, { "epoch": 0.14, "learning_rate": 1.386064030131827e-05, "loss": 0.659, "step": 368 }, { "epoch": 0.14, "learning_rate": 1.3898305084745764e-05, "loss": 0.6921, "step": 369 }, { "epoch": 0.14, "learning_rate": 1.393596986817326e-05, "loss": 0.755, "step": 370 }, { "epoch": 0.14, "learning_rate": 1.3973634651600753e-05, "loss": 0.6798, "step": 371 }, { "epoch": 0.14, "learning_rate": 1.4011299435028249e-05, "loss": 0.705, "step": 372 }, { "epoch": 0.14, "learning_rate": 1.4048964218455745e-05, "loss": 0.7204, "step": 373 }, { "epoch": 0.14, "learning_rate": 1.4086629001883241e-05, "loss": 0.7052, "step": 374 }, { "epoch": 0.14, "learning_rate": 1.4124293785310736e-05, "loss": 0.6787, "step": 375 }, { "epoch": 0.14, "learning_rate": 1.416195856873823e-05, "loss": 0.6975, "step": 376 }, { "epoch": 0.14, "learning_rate": 1.4199623352165725e-05, "loss": 0.7072, "step": 377 }, { "epoch": 0.14, "learning_rate": 1.4237288135593221e-05, "loss": 0.644, "step": 378 }, { "epoch": 0.14, "learning_rate": 1.4274952919020717e-05, "loss": 0.7194, "step": 379 }, { "epoch": 0.14, "learning_rate": 1.4312617702448213e-05, "loss": 0.7043, "step": 380 }, { "epoch": 0.14, "learning_rate": 1.4350282485875708e-05, "loss": 0.686, "step": 381 }, { "epoch": 0.14, "learning_rate": 1.4387947269303202e-05, "loss": 0.669, "step": 382 }, { "epoch": 0.14, "learning_rate": 1.4425612052730697e-05, "loss": 0.668, "step": 383 }, { "epoch": 0.14, "learning_rate": 1.4463276836158193e-05, "loss": 0.7055, "step": 384 }, { "epoch": 0.15, "learning_rate": 1.450094161958569e-05, "loss": 0.6627, "step": 385 }, { "epoch": 0.15, "learning_rate": 1.4538606403013185e-05, "loss": 0.6871, "step": 386 }, { "epoch": 0.15, "learning_rate": 1.4576271186440678e-05, "loss": 0.7376, "step": 387 }, { "epoch": 0.15, "learning_rate": 1.4613935969868174e-05, "loss": 0.7334, "step": 388 }, { "epoch": 0.15, "learning_rate": 1.4651600753295669e-05, "loss": 0.7057, "step": 389 }, { "epoch": 0.15, "learning_rate": 1.4689265536723165e-05, "loss": 0.6909, "step": 390 }, { "epoch": 0.15, "learning_rate": 1.4726930320150661e-05, "loss": 0.593, "step": 391 }, { "epoch": 0.15, "learning_rate": 1.4764595103578157e-05, "loss": 0.6845, "step": 392 }, { "epoch": 0.15, "learning_rate": 1.480225988700565e-05, "loss": 0.6934, "step": 393 }, { "epoch": 0.15, "learning_rate": 1.4839924670433146e-05, "loss": 0.7325, "step": 394 }, { "epoch": 0.15, "learning_rate": 1.487758945386064e-05, "loss": 0.6984, "step": 395 }, { "epoch": 0.15, "learning_rate": 1.4915254237288137e-05, "loss": 0.7301, "step": 396 }, { "epoch": 0.15, "learning_rate": 1.4952919020715633e-05, "loss": 0.6933, "step": 397 }, { "epoch": 0.15, "learning_rate": 1.4990583804143128e-05, "loss": 0.6787, "step": 398 }, { "epoch": 0.15, "learning_rate": 1.5028248587570622e-05, "loss": 0.6945, "step": 399 }, { "epoch": 0.15, "learning_rate": 1.5065913370998118e-05, "loss": 0.6601, "step": 400 }, { "epoch": 0.15, "learning_rate": 1.5103578154425613e-05, "loss": 0.7367, "step": 401 }, { "epoch": 0.15, "learning_rate": 1.5141242937853109e-05, "loss": 0.7278, "step": 402 }, { "epoch": 0.15, "learning_rate": 1.5178907721280605e-05, "loss": 0.7222, "step": 403 }, { "epoch": 0.15, "learning_rate": 1.5216572504708098e-05, "loss": 0.7051, "step": 404 }, { "epoch": 0.15, "learning_rate": 1.5254237288135594e-05, "loss": 0.6374, "step": 405 }, { "epoch": 0.15, "learning_rate": 1.529190207156309e-05, "loss": 0.6737, "step": 406 }, { "epoch": 0.15, "learning_rate": 1.5329566854990585e-05, "loss": 0.6686, "step": 407 }, { "epoch": 0.15, "learning_rate": 1.536723163841808e-05, "loss": 0.6748, "step": 408 }, { "epoch": 0.15, "learning_rate": 1.5404896421845577e-05, "loss": 0.7579, "step": 409 }, { "epoch": 0.15, "learning_rate": 1.544256120527307e-05, "loss": 0.7093, "step": 410 }, { "epoch": 0.15, "learning_rate": 1.5480225988700566e-05, "loss": 0.6905, "step": 411 }, { "epoch": 0.16, "learning_rate": 1.551789077212806e-05, "loss": 0.7008, "step": 412 }, { "epoch": 0.16, "learning_rate": 1.555555555555556e-05, "loss": 0.6537, "step": 413 }, { "epoch": 0.16, "learning_rate": 1.5593220338983053e-05, "loss": 0.7201, "step": 414 }, { "epoch": 0.16, "learning_rate": 1.5630885122410547e-05, "loss": 0.6805, "step": 415 }, { "epoch": 0.16, "learning_rate": 1.5668549905838042e-05, "loss": 0.6577, "step": 416 }, { "epoch": 0.16, "learning_rate": 1.5706214689265536e-05, "loss": 0.7168, "step": 417 }, { "epoch": 0.16, "learning_rate": 1.5743879472693034e-05, "loss": 0.6914, "step": 418 }, { "epoch": 0.16, "learning_rate": 1.578154425612053e-05, "loss": 0.6905, "step": 419 }, { "epoch": 0.16, "learning_rate": 1.5819209039548023e-05, "loss": 0.7013, "step": 420 }, { "epoch": 0.16, "learning_rate": 1.5856873822975518e-05, "loss": 0.7723, "step": 421 }, { "epoch": 0.16, "learning_rate": 1.5894538606403015e-05, "loss": 0.7173, "step": 422 }, { "epoch": 0.16, "learning_rate": 1.593220338983051e-05, "loss": 0.6748, "step": 423 }, { "epoch": 0.16, "learning_rate": 1.5969868173258004e-05, "loss": 0.7316, "step": 424 }, { "epoch": 0.16, "learning_rate": 1.6007532956685502e-05, "loss": 0.6714, "step": 425 }, { "epoch": 0.16, "learning_rate": 1.6045197740112997e-05, "loss": 0.6712, "step": 426 }, { "epoch": 0.16, "learning_rate": 1.608286252354049e-05, "loss": 0.6901, "step": 427 }, { "epoch": 0.16, "learning_rate": 1.6120527306967986e-05, "loss": 0.7068, "step": 428 }, { "epoch": 0.16, "learning_rate": 1.615819209039548e-05, "loss": 0.7278, "step": 429 }, { "epoch": 0.16, "learning_rate": 1.6195856873822978e-05, "loss": 0.633, "step": 430 }, { "epoch": 0.16, "learning_rate": 1.6233521657250472e-05, "loss": 0.7075, "step": 431 }, { "epoch": 0.16, "learning_rate": 1.6271186440677967e-05, "loss": 0.7732, "step": 432 }, { "epoch": 0.16, "learning_rate": 1.630885122410546e-05, "loss": 0.7039, "step": 433 }, { "epoch": 0.16, "learning_rate": 1.634651600753296e-05, "loss": 0.707, "step": 434 }, { "epoch": 0.16, "learning_rate": 1.6384180790960454e-05, "loss": 0.7149, "step": 435 }, { "epoch": 0.16, "learning_rate": 1.6421845574387948e-05, "loss": 0.7508, "step": 436 }, { "epoch": 0.16, "learning_rate": 1.6459510357815446e-05, "loss": 0.6717, "step": 437 }, { "epoch": 0.17, "learning_rate": 1.6497175141242937e-05, "loss": 0.6676, "step": 438 }, { "epoch": 0.17, "learning_rate": 1.6534839924670435e-05, "loss": 0.7244, "step": 439 }, { "epoch": 0.17, "learning_rate": 1.657250470809793e-05, "loss": 0.6877, "step": 440 }, { "epoch": 0.17, "learning_rate": 1.6610169491525424e-05, "loss": 0.679, "step": 441 }, { "epoch": 0.17, "learning_rate": 1.6647834274952922e-05, "loss": 0.7215, "step": 442 }, { "epoch": 0.17, "learning_rate": 1.6685499058380416e-05, "loss": 0.7012, "step": 443 }, { "epoch": 0.17, "learning_rate": 1.672316384180791e-05, "loss": 0.6849, "step": 444 }, { "epoch": 0.17, "learning_rate": 1.6760828625235405e-05, "loss": 0.6312, "step": 445 }, { "epoch": 0.17, "learning_rate": 1.67984934086629e-05, "loss": 0.6834, "step": 446 }, { "epoch": 0.17, "learning_rate": 1.6836158192090398e-05, "loss": 0.6612, "step": 447 }, { "epoch": 0.17, "learning_rate": 1.6873822975517892e-05, "loss": 0.7506, "step": 448 }, { "epoch": 0.17, "learning_rate": 1.691148775894539e-05, "loss": 0.7152, "step": 449 }, { "epoch": 0.17, "learning_rate": 1.694915254237288e-05, "loss": 0.7107, "step": 450 }, { "epoch": 0.17, "learning_rate": 1.698681732580038e-05, "loss": 0.7075, "step": 451 }, { "epoch": 0.17, "learning_rate": 1.7024482109227873e-05, "loss": 0.6632, "step": 452 }, { "epoch": 0.17, "learning_rate": 1.7062146892655368e-05, "loss": 0.6345, "step": 453 }, { "epoch": 0.17, "learning_rate": 1.7099811676082866e-05, "loss": 0.7115, "step": 454 }, { "epoch": 0.17, "learning_rate": 1.7137476459510357e-05, "loss": 0.6854, "step": 455 }, { "epoch": 0.17, "learning_rate": 1.7175141242937855e-05, "loss": 0.6974, "step": 456 }, { "epoch": 0.17, "learning_rate": 1.721280602636535e-05, "loss": 0.7026, "step": 457 }, { "epoch": 0.17, "learning_rate": 1.7250470809792844e-05, "loss": 0.684, "step": 458 }, { "epoch": 0.17, "learning_rate": 1.728813559322034e-05, "loss": 0.7072, "step": 459 }, { "epoch": 0.17, "learning_rate": 1.7325800376647836e-05, "loss": 0.6771, "step": 460 }, { "epoch": 0.17, "learning_rate": 1.736346516007533e-05, "loss": 0.696, "step": 461 }, { "epoch": 0.17, "learning_rate": 1.7401129943502825e-05, "loss": 0.6647, "step": 462 }, { "epoch": 0.17, "learning_rate": 1.7438794726930323e-05, "loss": 0.6781, "step": 463 }, { "epoch": 0.17, "learning_rate": 1.7476459510357817e-05, "loss": 0.6876, "step": 464 }, { "epoch": 0.18, "learning_rate": 1.7514124293785312e-05, "loss": 0.6586, "step": 465 }, { "epoch": 0.18, "learning_rate": 1.755178907721281e-05, "loss": 0.6756, "step": 466 }, { "epoch": 0.18, "learning_rate": 1.75894538606403e-05, "loss": 0.6939, "step": 467 }, { "epoch": 0.18, "learning_rate": 1.76271186440678e-05, "loss": 0.7085, "step": 468 }, { "epoch": 0.18, "learning_rate": 1.7664783427495293e-05, "loss": 0.6741, "step": 469 }, { "epoch": 0.18, "learning_rate": 1.7702448210922788e-05, "loss": 0.6728, "step": 470 }, { "epoch": 0.18, "learning_rate": 1.7740112994350286e-05, "loss": 0.6972, "step": 471 }, { "epoch": 0.18, "learning_rate": 1.7777777777777777e-05, "loss": 0.6792, "step": 472 }, { "epoch": 0.18, "learning_rate": 1.7815442561205274e-05, "loss": 0.6608, "step": 473 }, { "epoch": 0.18, "learning_rate": 1.785310734463277e-05, "loss": 0.6253, "step": 474 }, { "epoch": 0.18, "learning_rate": 1.7890772128060267e-05, "loss": 0.7334, "step": 475 }, { "epoch": 0.18, "learning_rate": 1.792843691148776e-05, "loss": 0.6993, "step": 476 }, { "epoch": 0.18, "learning_rate": 1.7966101694915256e-05, "loss": 0.6569, "step": 477 }, { "epoch": 0.18, "learning_rate": 1.800376647834275e-05, "loss": 0.6992, "step": 478 }, { "epoch": 0.18, "learning_rate": 1.8041431261770245e-05, "loss": 0.6469, "step": 479 }, { "epoch": 0.18, "learning_rate": 1.8079096045197743e-05, "loss": 0.7599, "step": 480 }, { "epoch": 0.18, "learning_rate": 1.8116760828625237e-05, "loss": 0.6133, "step": 481 }, { "epoch": 0.18, "learning_rate": 1.815442561205273e-05, "loss": 0.6338, "step": 482 }, { "epoch": 0.18, "learning_rate": 1.8192090395480226e-05, "loss": 0.6677, "step": 483 }, { "epoch": 0.18, "learning_rate": 1.822975517890772e-05, "loss": 0.6818, "step": 484 }, { "epoch": 0.18, "learning_rate": 1.826741996233522e-05, "loss": 0.6848, "step": 485 }, { "epoch": 0.18, "learning_rate": 1.8305084745762713e-05, "loss": 0.6539, "step": 486 }, { "epoch": 0.18, "learning_rate": 1.834274952919021e-05, "loss": 0.7039, "step": 487 }, { "epoch": 0.18, "learning_rate": 1.8380414312617705e-05, "loss": 0.6736, "step": 488 }, { "epoch": 0.18, "learning_rate": 1.84180790960452e-05, "loss": 0.6956, "step": 489 }, { "epoch": 0.18, "learning_rate": 1.8455743879472694e-05, "loss": 0.6882, "step": 490 }, { "epoch": 0.19, "learning_rate": 1.849340866290019e-05, "loss": 0.6171, "step": 491 }, { "epoch": 0.19, "learning_rate": 1.8531073446327686e-05, "loss": 0.6568, "step": 492 }, { "epoch": 0.19, "learning_rate": 1.856873822975518e-05, "loss": 0.6585, "step": 493 }, { "epoch": 0.19, "learning_rate": 1.8606403013182675e-05, "loss": 0.6747, "step": 494 }, { "epoch": 0.19, "learning_rate": 1.864406779661017e-05, "loss": 0.7315, "step": 495 }, { "epoch": 0.19, "learning_rate": 1.8681732580037664e-05, "loss": 0.709, "step": 496 }, { "epoch": 0.19, "learning_rate": 1.8719397363465162e-05, "loss": 0.6387, "step": 497 }, { "epoch": 0.19, "learning_rate": 1.8757062146892657e-05, "loss": 0.7209, "step": 498 }, { "epoch": 0.19, "learning_rate": 1.8794726930320155e-05, "loss": 0.6467, "step": 499 }, { "epoch": 0.19, "learning_rate": 1.8832391713747646e-05, "loss": 0.6637, "step": 500 }, { "epoch": 0.19, "learning_rate": 1.8870056497175144e-05, "loss": 0.6496, "step": 501 }, { "epoch": 0.19, "learning_rate": 1.8907721280602638e-05, "loss": 0.6412, "step": 502 }, { "epoch": 0.19, "learning_rate": 1.8945386064030133e-05, "loss": 0.648, "step": 503 }, { "epoch": 0.19, "learning_rate": 1.898305084745763e-05, "loss": 0.675, "step": 504 }, { "epoch": 0.19, "learning_rate": 1.9020715630885125e-05, "loss": 0.7099, "step": 505 }, { "epoch": 0.19, "learning_rate": 1.905838041431262e-05, "loss": 0.6794, "step": 506 }, { "epoch": 0.19, "learning_rate": 1.9096045197740114e-05, "loss": 0.7039, "step": 507 }, { "epoch": 0.19, "learning_rate": 1.913370998116761e-05, "loss": 0.6178, "step": 508 }, { "epoch": 0.19, "learning_rate": 1.9171374764595106e-05, "loss": 0.6874, "step": 509 }, { "epoch": 0.19, "learning_rate": 1.92090395480226e-05, "loss": 0.7006, "step": 510 }, { "epoch": 0.19, "learning_rate": 1.9246704331450095e-05, "loss": 0.6504, "step": 511 }, { "epoch": 0.19, "learning_rate": 1.928436911487759e-05, "loss": 0.6942, "step": 512 }, { "epoch": 0.19, "learning_rate": 1.9322033898305087e-05, "loss": 0.6667, "step": 513 }, { "epoch": 0.19, "learning_rate": 1.9359698681732582e-05, "loss": 0.6815, "step": 514 }, { "epoch": 0.19, "learning_rate": 1.9397363465160076e-05, "loss": 0.6388, "step": 515 }, { "epoch": 0.19, "learning_rate": 1.9435028248587574e-05, "loss": 0.6721, "step": 516 }, { "epoch": 0.19, "learning_rate": 1.9472693032015065e-05, "loss": 0.6602, "step": 517 }, { "epoch": 0.2, "learning_rate": 1.9510357815442563e-05, "loss": 0.6927, "step": 518 }, { "epoch": 0.2, "learning_rate": 1.9548022598870058e-05, "loss": 0.6877, "step": 519 }, { "epoch": 0.2, "learning_rate": 1.9585687382297552e-05, "loss": 0.6733, "step": 520 }, { "epoch": 0.2, "learning_rate": 1.962335216572505e-05, "loss": 0.6894, "step": 521 }, { "epoch": 0.2, "learning_rate": 1.9661016949152545e-05, "loss": 0.6924, "step": 522 }, { "epoch": 0.2, "learning_rate": 1.969868173258004e-05, "loss": 0.6005, "step": 523 }, { "epoch": 0.2, "learning_rate": 1.9736346516007534e-05, "loss": 0.6611, "step": 524 }, { "epoch": 0.2, "learning_rate": 1.977401129943503e-05, "loss": 0.6791, "step": 525 }, { "epoch": 0.2, "learning_rate": 1.9811676082862526e-05, "loss": 0.6409, "step": 526 }, { "epoch": 0.2, "learning_rate": 1.984934086629002e-05, "loss": 0.7135, "step": 527 }, { "epoch": 0.2, "learning_rate": 1.9887005649717518e-05, "loss": 0.5862, "step": 528 }, { "epoch": 0.2, "learning_rate": 1.992467043314501e-05, "loss": 0.662, "step": 529 }, { "epoch": 0.2, "learning_rate": 1.9962335216572507e-05, "loss": 0.6571, "step": 530 }, { "epoch": 0.2, "learning_rate": 2e-05, "loss": 0.7191, "step": 531 }, { "epoch": 0.2, "learning_rate": 1.9999997833858292e-05, "loss": 0.6467, "step": 532 }, { "epoch": 0.2, "learning_rate": 1.9999991335434094e-05, "loss": 0.7477, "step": 533 }, { "epoch": 0.2, "learning_rate": 1.9999980504730227e-05, "loss": 0.6774, "step": 534 }, { "epoch": 0.2, "learning_rate": 1.9999965341751383e-05, "loss": 0.6213, "step": 535 }, { "epoch": 0.2, "learning_rate": 1.999994584650413e-05, "loss": 0.6872, "step": 536 }, { "epoch": 0.2, "learning_rate": 1.9999922018996914e-05, "loss": 0.7001, "step": 537 }, { "epoch": 0.2, "learning_rate": 1.999989385924006e-05, "loss": 0.6639, "step": 538 }, { "epoch": 0.2, "learning_rate": 1.9999861367245766e-05, "loss": 0.6243, "step": 539 }, { "epoch": 0.2, "learning_rate": 1.9999824543028106e-05, "loss": 0.6941, "step": 540 }, { "epoch": 0.2, "learning_rate": 1.9999783386603037e-05, "loss": 0.6245, "step": 541 }, { "epoch": 0.2, "learning_rate": 1.9999737897988388e-05, "loss": 0.7055, "step": 542 }, { "epoch": 0.2, "learning_rate": 1.9999688077203863e-05, "loss": 0.6232, "step": 543 }, { "epoch": 0.21, "learning_rate": 1.999963392427105e-05, "loss": 0.6864, "step": 544 }, { "epoch": 0.21, "learning_rate": 1.999957543921341e-05, "loss": 0.5833, "step": 545 }, { "epoch": 0.21, "learning_rate": 1.9999512622056273e-05, "loss": 0.6276, "step": 546 }, { "epoch": 0.21, "learning_rate": 1.9999445472826863e-05, "loss": 0.6553, "step": 547 }, { "epoch": 0.21, "learning_rate": 1.9999373991554264e-05, "loss": 0.5761, "step": 548 }, { "epoch": 0.21, "learning_rate": 1.9999298178269445e-05, "loss": 0.5979, "step": 549 }, { "epoch": 0.21, "learning_rate": 1.9999218033005255e-05, "loss": 0.6395, "step": 550 }, { "epoch": 0.21, "learning_rate": 1.999913355579641e-05, "loss": 0.6794, "step": 551 }, { "epoch": 0.21, "learning_rate": 1.9999044746679512e-05, "loss": 0.6419, "step": 552 }, { "epoch": 0.21, "learning_rate": 1.999895160569303e-05, "loss": 0.6281, "step": 553 }, { "epoch": 0.21, "learning_rate": 1.999885413287732e-05, "loss": 0.6523, "step": 554 }, { "epoch": 0.21, "learning_rate": 1.999875232827461e-05, "loss": 0.7245, "step": 555 }, { "epoch": 0.21, "learning_rate": 1.9998646191929004e-05, "loss": 0.6588, "step": 556 }, { "epoch": 0.21, "learning_rate": 1.999853572388648e-05, "loss": 0.6492, "step": 557 }, { "epoch": 0.21, "learning_rate": 1.9998420924194897e-05, "loss": 0.6621, "step": 558 }, { "epoch": 0.21, "learning_rate": 1.9998301792903992e-05, "loss": 0.6288, "step": 559 }, { "epoch": 0.21, "learning_rate": 1.999817833006538e-05, "loss": 0.6635, "step": 560 }, { "epoch": 0.21, "learning_rate": 1.9998050535732538e-05, "loss": 0.6216, "step": 561 }, { "epoch": 0.21, "learning_rate": 1.9997918409960836e-05, "loss": 0.604, "step": 562 }, { "epoch": 0.21, "learning_rate": 1.9997781952807516e-05, "loss": 0.6595, "step": 563 }, { "epoch": 0.21, "learning_rate": 1.9997641164331695e-05, "loss": 0.6541, "step": 564 }, { "epoch": 0.21, "learning_rate": 1.9997496044594366e-05, "loss": 0.6345, "step": 565 }, { "epoch": 0.21, "learning_rate": 1.999734659365839e-05, "loss": 0.6727, "step": 566 }, { "epoch": 0.21, "learning_rate": 1.999719281158853e-05, "loss": 0.659, "step": 567 }, { "epoch": 0.21, "learning_rate": 1.9997034698451396e-05, "loss": 0.661, "step": 568 }, { "epoch": 0.21, "learning_rate": 1.999687225431549e-05, "loss": 0.6451, "step": 569 }, { "epoch": 0.21, "learning_rate": 1.9996705479251192e-05, "loss": 0.6989, "step": 570 }, { "epoch": 0.22, "learning_rate": 1.9996534373330748e-05, "loss": 0.6834, "step": 571 }, { "epoch": 0.22, "learning_rate": 1.999635893662829e-05, "loss": 0.6171, "step": 572 }, { "epoch": 0.22, "learning_rate": 1.9996179169219818e-05, "loss": 0.6795, "step": 573 }, { "epoch": 0.22, "learning_rate": 1.999599507118322e-05, "loss": 0.73, "step": 574 }, { "epoch": 0.22, "learning_rate": 1.999580664259824e-05, "loss": 0.7009, "step": 575 }, { "epoch": 0.22, "learning_rate": 1.9995613883546523e-05, "loss": 0.6604, "step": 576 }, { "epoch": 0.22, "learning_rate": 1.999541679411157e-05, "loss": 0.5865, "step": 577 }, { "epoch": 0.22, "learning_rate": 1.999521537437877e-05, "loss": 0.6225, "step": 578 }, { "epoch": 0.22, "learning_rate": 1.999500962443538e-05, "loss": 0.6707, "step": 579 }, { "epoch": 0.22, "learning_rate": 1.999479954437054e-05, "loss": 0.5916, "step": 580 }, { "epoch": 0.22, "learning_rate": 1.9994585134275263e-05, "loss": 0.6645, "step": 581 }, { "epoch": 0.22, "learning_rate": 1.9994366394242432e-05, "loss": 0.7312, "step": 582 }, { "epoch": 0.22, "learning_rate": 1.999414332436682e-05, "loss": 0.6625, "step": 583 }, { "epoch": 0.22, "learning_rate": 1.999391592474506e-05, "loss": 0.6024, "step": 584 }, { "epoch": 0.22, "learning_rate": 1.9993684195475666e-05, "loss": 0.6253, "step": 585 }, { "epoch": 0.22, "learning_rate": 1.999344813665904e-05, "loss": 0.6462, "step": 586 }, { "epoch": 0.22, "learning_rate": 1.999320774839744e-05, "loss": 0.6374, "step": 587 }, { "epoch": 0.22, "learning_rate": 1.9992963030795013e-05, "loss": 0.6788, "step": 588 }, { "epoch": 0.22, "learning_rate": 1.999271398395778e-05, "loss": 0.63, "step": 589 }, { "epoch": 0.22, "learning_rate": 1.999246060799363e-05, "loss": 0.6917, "step": 590 }, { "epoch": 0.22, "learning_rate": 1.9992202903012332e-05, "loss": 0.6641, "step": 591 }, { "epoch": 0.22, "learning_rate": 1.999194086912554e-05, "loss": 0.612, "step": 592 }, { "epoch": 0.22, "learning_rate": 1.999167450644676e-05, "loss": 0.596, "step": 593 }, { "epoch": 0.22, "learning_rate": 1.9991403815091407e-05, "loss": 0.6317, "step": 594 }, { "epoch": 0.22, "learning_rate": 1.9991128795176732e-05, "loss": 0.6475, "step": 595 }, { "epoch": 0.22, "learning_rate": 1.99908494468219e-05, "loss": 0.6099, "step": 596 }, { "epoch": 0.23, "learning_rate": 1.9990565770147915e-05, "loss": 0.7131, "step": 597 }, { "epoch": 0.23, "learning_rate": 1.9990277765277687e-05, "loss": 0.6886, "step": 598 }, { "epoch": 0.23, "learning_rate": 1.998998543233598e-05, "loss": 0.6519, "step": 599 }, { "epoch": 0.23, "learning_rate": 1.9989688771449446e-05, "loss": 0.7065, "step": 600 }, { "epoch": 0.23, "learning_rate": 1.998938778274661e-05, "loss": 0.6909, "step": 601 }, { "epoch": 0.23, "learning_rate": 1.9989082466357854e-05, "loss": 0.6643, "step": 602 }, { "epoch": 0.23, "learning_rate": 1.9988772822415467e-05, "loss": 0.6093, "step": 603 }, { "epoch": 0.23, "learning_rate": 1.9988458851053584e-05, "loss": 0.6412, "step": 604 }, { "epoch": 0.23, "learning_rate": 1.998814055240823e-05, "loss": 0.6786, "step": 605 }, { "epoch": 0.23, "learning_rate": 1.9987817926617305e-05, "loss": 0.6599, "step": 606 }, { "epoch": 0.23, "learning_rate": 1.9987490973820573e-05, "loss": 0.7069, "step": 607 }, { "epoch": 0.23, "learning_rate": 1.9987159694159684e-05, "loss": 0.6929, "step": 608 }, { "epoch": 0.23, "learning_rate": 1.9986824087778155e-05, "loss": 0.6223, "step": 609 }, { "epoch": 0.23, "learning_rate": 1.9986484154821386e-05, "loss": 0.6412, "step": 610 }, { "epoch": 0.23, "learning_rate": 1.9986139895436633e-05, "loss": 0.6595, "step": 611 }, { "epoch": 0.23, "learning_rate": 1.998579130977305e-05, "loss": 0.6108, "step": 612 }, { "epoch": 0.23, "learning_rate": 1.998543839798165e-05, "loss": 0.6409, "step": 613 }, { "epoch": 0.23, "learning_rate": 1.9985081160215327e-05, "loss": 0.6487, "step": 614 }, { "epoch": 0.23, "learning_rate": 1.9984719596628843e-05, "loss": 0.6528, "step": 615 }, { "epoch": 0.23, "learning_rate": 1.9984353707378843e-05, "loss": 0.6647, "step": 616 }, { "epoch": 0.23, "learning_rate": 1.9983983492623832e-05, "loss": 0.583, "step": 617 }, { "epoch": 0.23, "learning_rate": 1.9983608952524204e-05, "loss": 0.6223, "step": 618 }, { "epoch": 0.23, "learning_rate": 1.998323008724222e-05, "loss": 0.6417, "step": 619 }, { "epoch": 0.23, "learning_rate": 1.9982846896942013e-05, "loss": 0.7113, "step": 620 }, { "epoch": 0.23, "learning_rate": 1.9982459381789593e-05, "loss": 0.6204, "step": 621 }, { "epoch": 0.23, "learning_rate": 1.9982067541952845e-05, "loss": 0.5851, "step": 622 }, { "epoch": 0.23, "learning_rate": 1.998167137760152e-05, "loss": 0.6407, "step": 623 }, { "epoch": 0.24, "learning_rate": 1.998127088890725e-05, "loss": 0.6456, "step": 624 }, { "epoch": 0.24, "learning_rate": 1.998086607604354e-05, "loss": 0.6585, "step": 625 }, { "epoch": 0.24, "learning_rate": 1.9980456939185764e-05, "loss": 0.6142, "step": 626 }, { "epoch": 0.24, "learning_rate": 1.998004347851117e-05, "loss": 0.6778, "step": 627 }, { "epoch": 0.24, "learning_rate": 1.997962569419889e-05, "loss": 0.6166, "step": 628 }, { "epoch": 0.24, "learning_rate": 1.9979203586429907e-05, "loss": 0.6363, "step": 629 }, { "epoch": 0.24, "learning_rate": 1.9978777155387096e-05, "loss": 0.6512, "step": 630 }, { "epoch": 0.24, "learning_rate": 1.9978346401255205e-05, "loss": 0.6651, "step": 631 }, { "epoch": 0.24, "learning_rate": 1.9977911324220838e-05, "loss": 0.6926, "step": 632 }, { "epoch": 0.24, "learning_rate": 1.9977471924472488e-05, "loss": 0.5882, "step": 633 }, { "epoch": 0.24, "learning_rate": 1.997702820220052e-05, "loss": 0.6522, "step": 634 }, { "epoch": 0.24, "learning_rate": 1.9976580157597158e-05, "loss": 0.6661, "step": 635 }, { "epoch": 0.24, "learning_rate": 1.9976127790856513e-05, "loss": 0.6424, "step": 636 }, { "epoch": 0.24, "learning_rate": 1.9975671102174562e-05, "loss": 0.6535, "step": 637 }, { "epoch": 0.24, "learning_rate": 1.9975210091749158e-05, "loss": 0.6463, "step": 638 }, { "epoch": 0.24, "learning_rate": 1.997474475978002e-05, "loss": 0.6017, "step": 639 }, { "epoch": 0.24, "learning_rate": 1.9974275106468743e-05, "loss": 0.6038, "step": 640 }, { "epoch": 0.24, "learning_rate": 1.9973801132018797e-05, "loss": 0.6565, "step": 641 }, { "epoch": 0.24, "learning_rate": 1.9973322836635517e-05, "loss": 0.6933, "step": 642 }, { "epoch": 0.24, "learning_rate": 1.997284022052612e-05, "loss": 0.6045, "step": 643 }, { "epoch": 0.24, "learning_rate": 1.9972353283899688e-05, "loss": 0.6524, "step": 644 }, { "epoch": 0.24, "learning_rate": 1.9971862026967165e-05, "loss": 0.6443, "step": 645 }, { "epoch": 0.24, "learning_rate": 1.9971366449941395e-05, "loss": 0.6339, "step": 646 }, { "epoch": 0.24, "learning_rate": 1.997086655303706e-05, "loss": 0.6158, "step": 647 }, { "epoch": 0.24, "learning_rate": 1.9970362336470744e-05, "loss": 0.6927, "step": 648 }, { "epoch": 0.24, "learning_rate": 1.996985380046087e-05, "loss": 0.6671, "step": 649 }, { "epoch": 0.25, "learning_rate": 1.996934094522777e-05, "loss": 0.696, "step": 650 }, { "epoch": 0.25, "learning_rate": 1.9968823770993614e-05, "loss": 0.6674, "step": 651 }, { "epoch": 0.25, "learning_rate": 1.9968302277982463e-05, "loss": 0.5677, "step": 652 }, { "epoch": 0.25, "learning_rate": 1.996777646642024e-05, "loss": 0.6794, "step": 653 }, { "epoch": 0.25, "learning_rate": 1.9967246336534738e-05, "loss": 0.6617, "step": 654 }, { "epoch": 0.25, "learning_rate": 1.996671188855563e-05, "loss": 0.6258, "step": 655 }, { "epoch": 0.25, "learning_rate": 1.9966173122714452e-05, "loss": 0.7216, "step": 656 }, { "epoch": 0.25, "learning_rate": 1.996563003924461e-05, "loss": 0.6594, "step": 657 }, { "epoch": 0.25, "learning_rate": 1.996508263838139e-05, "loss": 0.6587, "step": 658 }, { "epoch": 0.25, "learning_rate": 1.996453092036193e-05, "loss": 0.6015, "step": 659 }, { "epoch": 0.25, "learning_rate": 1.9963974885425267e-05, "loss": 0.6729, "step": 660 }, { "epoch": 0.25, "learning_rate": 1.9963414533812277e-05, "loss": 0.5427, "step": 661 }, { "epoch": 0.25, "learning_rate": 1.9962849865765723e-05, "loss": 0.6033, "step": 662 }, { "epoch": 0.25, "learning_rate": 1.9962280881530237e-05, "loss": 0.7118, "step": 663 }, { "epoch": 0.25, "learning_rate": 1.996170758135232e-05, "loss": 0.6513, "step": 664 }, { "epoch": 0.25, "learning_rate": 1.996112996548034e-05, "loss": 0.727, "step": 665 }, { "epoch": 0.25, "learning_rate": 1.9960548034164536e-05, "loss": 0.613, "step": 666 }, { "epoch": 0.25, "learning_rate": 1.9959961787657017e-05, "loss": 0.6215, "step": 667 }, { "epoch": 0.25, "learning_rate": 1.9959371226211768e-05, "loss": 0.6383, "step": 668 }, { "epoch": 0.25, "learning_rate": 1.995877635008463e-05, "loss": 0.6381, "step": 669 }, { "epoch": 0.25, "learning_rate": 1.995817715953332e-05, "loss": 0.65, "step": 670 }, { "epoch": 0.25, "learning_rate": 1.9957573654817427e-05, "loss": 0.6443, "step": 671 }, { "epoch": 0.25, "learning_rate": 1.9956965836198406e-05, "loss": 0.6261, "step": 672 }, { "epoch": 0.25, "learning_rate": 1.995635370393958e-05, "loss": 0.6056, "step": 673 }, { "epoch": 0.25, "learning_rate": 1.9955737258306147e-05, "loss": 0.6125, "step": 674 }, { "epoch": 0.25, "learning_rate": 1.995511649956516e-05, "loss": 0.5875, "step": 675 }, { "epoch": 0.25, "learning_rate": 1.9954491427985557e-05, "loss": 0.6753, "step": 676 }, { "epoch": 0.26, "learning_rate": 1.9953862043838134e-05, "loss": 0.6003, "step": 677 }, { "epoch": 0.26, "learning_rate": 1.9953228347395558e-05, "loss": 0.6015, "step": 678 }, { "epoch": 0.26, "learning_rate": 1.995259033893236e-05, "loss": 0.6146, "step": 679 }, { "epoch": 0.26, "learning_rate": 1.995194801872495e-05, "loss": 0.6219, "step": 680 }, { "epoch": 0.26, "learning_rate": 1.9951301387051596e-05, "loss": 0.6715, "step": 681 }, { "epoch": 0.26, "learning_rate": 1.9950650444192438e-05, "loss": 0.6469, "step": 682 }, { "epoch": 0.26, "learning_rate": 1.9949995190429486e-05, "loss": 0.6374, "step": 683 }, { "epoch": 0.26, "learning_rate": 1.994933562604661e-05, "loss": 0.7177, "step": 684 }, { "epoch": 0.26, "learning_rate": 1.9948671751329552e-05, "loss": 0.5698, "step": 685 }, { "epoch": 0.26, "learning_rate": 1.994800356656592e-05, "loss": 0.609, "step": 686 }, { "epoch": 0.26, "learning_rate": 1.9947331072045196e-05, "loss": 0.6777, "step": 687 }, { "epoch": 0.26, "learning_rate": 1.9946654268058722e-05, "loss": 0.6324, "step": 688 }, { "epoch": 0.26, "learning_rate": 1.9945973154899705e-05, "loss": 0.6385, "step": 689 }, { "epoch": 0.26, "learning_rate": 1.9945287732863224e-05, "loss": 0.6134, "step": 690 }, { "epoch": 0.26, "learning_rate": 1.9944598002246233e-05, "loss": 0.6814, "step": 691 }, { "epoch": 0.26, "learning_rate": 1.9943903963347524e-05, "loss": 0.6982, "step": 692 }, { "epoch": 0.26, "learning_rate": 1.994320561646779e-05, "loss": 0.6989, "step": 693 }, { "epoch": 0.26, "learning_rate": 1.9942502961909568e-05, "loss": 0.6437, "step": 694 }, { "epoch": 0.26, "learning_rate": 1.9941795999977267e-05, "loss": 0.5995, "step": 695 }, { "epoch": 0.26, "learning_rate": 1.9941084730977166e-05, "loss": 0.639, "step": 696 }, { "epoch": 0.26, "learning_rate": 1.9940369155217407e-05, "loss": 0.5977, "step": 697 }, { "epoch": 0.26, "learning_rate": 1.9939649273007998e-05, "loss": 0.5977, "step": 698 }, { "epoch": 0.26, "learning_rate": 1.9938925084660806e-05, "loss": 0.6938, "step": 699 }, { "epoch": 0.26, "learning_rate": 1.993819659048958e-05, "loss": 0.6899, "step": 700 }, { "epoch": 0.26, "learning_rate": 1.9937463790809918e-05, "loss": 0.6841, "step": 701 }, { "epoch": 0.26, "learning_rate": 1.9936726685939288e-05, "loss": 0.6351, "step": 702 }, { "epoch": 0.27, "learning_rate": 1.9935985276197033e-05, "loss": 0.6323, "step": 703 }, { "epoch": 0.27, "learning_rate": 1.9935239561904342e-05, "loss": 0.6361, "step": 704 }, { "epoch": 0.27, "learning_rate": 1.9934489543384287e-05, "loss": 0.6988, "step": 705 }, { "epoch": 0.27, "learning_rate": 1.9933735220961793e-05, "loss": 0.6563, "step": 706 }, { "epoch": 0.27, "learning_rate": 1.993297659496366e-05, "loss": 0.6069, "step": 707 }, { "epoch": 0.27, "learning_rate": 1.993221366571854e-05, "loss": 0.6951, "step": 708 }, { "epoch": 0.27, "learning_rate": 1.9931446433556954e-05, "loss": 0.6952, "step": 709 }, { "epoch": 0.27, "learning_rate": 1.9930674898811296e-05, "loss": 0.7115, "step": 710 }, { "epoch": 0.27, "learning_rate": 1.9929899061815815e-05, "loss": 0.6661, "step": 711 }, { "epoch": 0.27, "learning_rate": 1.9929118922906623e-05, "loss": 0.7067, "step": 712 }, { "epoch": 0.27, "learning_rate": 1.9928334482421696e-05, "loss": 0.6313, "step": 713 }, { "epoch": 0.27, "learning_rate": 1.992754574070088e-05, "loss": 0.6657, "step": 714 }, { "epoch": 0.27, "learning_rate": 1.9926752698085882e-05, "loss": 0.6084, "step": 715 }, { "epoch": 0.27, "learning_rate": 1.9925955354920265e-05, "loss": 0.6209, "step": 716 }, { "epoch": 0.27, "learning_rate": 1.9925153711549467e-05, "loss": 0.6297, "step": 717 }, { "epoch": 0.27, "learning_rate": 1.9924347768320774e-05, "loss": 0.5756, "step": 718 }, { "epoch": 0.27, "learning_rate": 1.992353752558335e-05, "loss": 0.6899, "step": 719 }, { "epoch": 0.27, "learning_rate": 1.9922722983688217e-05, "loss": 0.658, "step": 720 }, { "epoch": 0.27, "learning_rate": 1.9921904142988253e-05, "loss": 0.6761, "step": 721 }, { "epoch": 0.27, "learning_rate": 1.99210810038382e-05, "loss": 0.6813, "step": 722 }, { "epoch": 0.27, "learning_rate": 1.992025356659467e-05, "loss": 0.6237, "step": 723 }, { "epoch": 0.27, "learning_rate": 1.9919421831616136e-05, "loss": 0.6029, "step": 724 }, { "epoch": 0.27, "learning_rate": 1.9918585799262923e-05, "loss": 0.624, "step": 725 }, { "epoch": 0.27, "learning_rate": 1.9917745469897223e-05, "loss": 0.6371, "step": 726 }, { "epoch": 0.27, "learning_rate": 1.99169008438831e-05, "loss": 0.6748, "step": 727 }, { "epoch": 0.27, "learning_rate": 1.9916051921586456e-05, "loss": 0.6096, "step": 728 }, { "epoch": 0.27, "learning_rate": 1.991519870337508e-05, "loss": 0.5737, "step": 729 }, { "epoch": 0.28, "learning_rate": 1.9914341189618604e-05, "loss": 0.6581, "step": 730 }, { "epoch": 0.28, "learning_rate": 1.9913479380688527e-05, "loss": 0.675, "step": 731 }, { "epoch": 0.28, "learning_rate": 1.9912613276958214e-05, "loss": 0.6547, "step": 732 }, { "epoch": 0.28, "learning_rate": 1.9911742878802882e-05, "loss": 0.6789, "step": 733 }, { "epoch": 0.28, "learning_rate": 1.9910868186599615e-05, "loss": 0.6545, "step": 734 }, { "epoch": 0.28, "learning_rate": 1.9909989200727348e-05, "loss": 0.6098, "step": 735 }, { "epoch": 0.28, "learning_rate": 1.990910592156689e-05, "loss": 0.5909, "step": 736 }, { "epoch": 0.28, "learning_rate": 1.9908218349500898e-05, "loss": 0.6177, "step": 737 }, { "epoch": 0.28, "learning_rate": 1.9907326484913897e-05, "loss": 0.6553, "step": 738 }, { "epoch": 0.28, "learning_rate": 1.990643032819226e-05, "loss": 0.7017, "step": 739 }, { "epoch": 0.28, "learning_rate": 1.990552987972424e-05, "loss": 0.6177, "step": 740 }, { "epoch": 0.28, "learning_rate": 1.9904625139899927e-05, "loss": 0.6213, "step": 741 }, { "epoch": 0.28, "learning_rate": 1.9903716109111282e-05, "loss": 0.6445, "step": 742 }, { "epoch": 0.28, "learning_rate": 1.9902802787752127e-05, "loss": 0.6528, "step": 743 }, { "epoch": 0.28, "learning_rate": 1.9901885176218133e-05, "loss": 0.6573, "step": 744 }, { "epoch": 0.28, "learning_rate": 1.990096327490684e-05, "loss": 0.5567, "step": 745 }, { "epoch": 0.28, "learning_rate": 1.9900037084217637e-05, "loss": 0.6303, "step": 746 }, { "epoch": 0.28, "learning_rate": 1.9899106604551782e-05, "loss": 0.6794, "step": 747 }, { "epoch": 0.28, "learning_rate": 1.9898171836312383e-05, "loss": 0.6775, "step": 748 }, { "epoch": 0.28, "learning_rate": 1.9897232779904404e-05, "loss": 0.6434, "step": 749 }, { "epoch": 0.28, "learning_rate": 1.9896289435734678e-05, "loss": 0.5806, "step": 750 }, { "epoch": 0.28, "learning_rate": 1.989534180421188e-05, "loss": 0.5933, "step": 751 }, { "epoch": 0.28, "learning_rate": 1.9894389885746555e-05, "loss": 0.6757, "step": 752 }, { "epoch": 0.28, "learning_rate": 1.9893433680751105e-05, "loss": 0.6151, "step": 753 }, { "epoch": 0.28, "learning_rate": 1.989247318963978e-05, "loss": 0.6393, "step": 754 }, { "epoch": 0.28, "learning_rate": 1.989150841282869e-05, "loss": 0.6069, "step": 755 }, { "epoch": 0.29, "learning_rate": 1.989053935073581e-05, "loss": 0.6033, "step": 756 }, { "epoch": 0.29, "learning_rate": 1.9889566003780962e-05, "loss": 0.6101, "step": 757 }, { "epoch": 0.29, "learning_rate": 1.9888588372385826e-05, "loss": 0.6613, "step": 758 }, { "epoch": 0.29, "learning_rate": 1.9887606456973945e-05, "loss": 0.6175, "step": 759 }, { "epoch": 0.29, "learning_rate": 1.9886620257970706e-05, "loss": 0.6354, "step": 760 }, { "epoch": 0.29, "learning_rate": 1.9885629775803362e-05, "loss": 0.686, "step": 761 }, { "epoch": 0.29, "learning_rate": 1.9884635010901018e-05, "loss": 0.5862, "step": 762 }, { "epoch": 0.29, "learning_rate": 1.988363596369463e-05, "loss": 0.5852, "step": 763 }, { "epoch": 0.29, "learning_rate": 1.988263263461702e-05, "loss": 0.6776, "step": 764 }, { "epoch": 0.29, "learning_rate": 1.9881625024102856e-05, "loss": 0.6592, "step": 765 }, { "epoch": 0.29, "learning_rate": 1.9880613132588662e-05, "loss": 0.606, "step": 766 }, { "epoch": 0.29, "learning_rate": 1.9879596960512822e-05, "loss": 0.6655, "step": 767 }, { "epoch": 0.29, "learning_rate": 1.9878576508315565e-05, "loss": 0.5799, "step": 768 }, { "epoch": 0.29, "learning_rate": 1.9877551776438982e-05, "loss": 0.6141, "step": 769 }, { "epoch": 0.29, "learning_rate": 1.9876522765327016e-05, "loss": 0.6803, "step": 770 }, { "epoch": 0.29, "learning_rate": 1.9875489475425468e-05, "loss": 0.6376, "step": 771 }, { "epoch": 0.29, "learning_rate": 1.9874451907181984e-05, "loss": 0.5899, "step": 772 }, { "epoch": 0.29, "learning_rate": 1.9873410061046067e-05, "loss": 0.681, "step": 773 }, { "epoch": 0.29, "learning_rate": 1.9872363937469075e-05, "loss": 0.6475, "step": 774 }, { "epoch": 0.29, "learning_rate": 1.987131353690422e-05, "loss": 0.7142, "step": 775 }, { "epoch": 0.29, "learning_rate": 1.9870258859806567e-05, "loss": 0.6474, "step": 776 }, { "epoch": 0.29, "learning_rate": 1.9869199906633025e-05, "loss": 0.6209, "step": 777 }, { "epoch": 0.29, "learning_rate": 1.986813667784237e-05, "loss": 0.6285, "step": 778 }, { "epoch": 0.29, "learning_rate": 1.986706917389522e-05, "loss": 0.6218, "step": 779 }, { "epoch": 0.29, "learning_rate": 1.9865997395254045e-05, "loss": 0.645, "step": 780 }, { "epoch": 0.29, "learning_rate": 1.986492134238318e-05, "loss": 0.6562, "step": 781 }, { "epoch": 0.29, "learning_rate": 1.986384101574879e-05, "loss": 0.6662, "step": 782 }, { "epoch": 0.3, "learning_rate": 1.9862756415818904e-05, "loss": 0.6004, "step": 783 }, { "epoch": 0.3, "learning_rate": 1.986166754306341e-05, "loss": 0.6716, "step": 784 }, { "epoch": 0.3, "learning_rate": 1.9860574397954032e-05, "loss": 0.649, "step": 785 }, { "epoch": 0.3, "learning_rate": 1.9859476980964353e-05, "loss": 0.644, "step": 786 }, { "epoch": 0.3, "learning_rate": 1.9858375292569806e-05, "loss": 0.6995, "step": 787 }, { "epoch": 0.3, "learning_rate": 1.9857269333247673e-05, "loss": 0.6257, "step": 788 }, { "epoch": 0.3, "learning_rate": 1.9856159103477085e-05, "loss": 0.6373, "step": 789 }, { "epoch": 0.3, "learning_rate": 1.985504460373903e-05, "loss": 0.6432, "step": 790 }, { "epoch": 0.3, "learning_rate": 1.9853925834516333e-05, "loss": 0.603, "step": 791 }, { "epoch": 0.3, "learning_rate": 1.9852802796293684e-05, "loss": 0.6382, "step": 792 }, { "epoch": 0.3, "learning_rate": 1.985167548955761e-05, "loss": 0.615, "step": 793 }, { "epoch": 0.3, "learning_rate": 1.9850543914796498e-05, "loss": 0.6352, "step": 794 }, { "epoch": 0.3, "learning_rate": 1.984940807250057e-05, "loss": 0.5961, "step": 795 }, { "epoch": 0.3, "learning_rate": 1.9848267963161914e-05, "loss": 0.6455, "step": 796 }, { "epoch": 0.3, "learning_rate": 1.9847123587274452e-05, "loss": 0.6511, "step": 797 }, { "epoch": 0.3, "learning_rate": 1.984597494533396e-05, "loss": 0.6264, "step": 798 }, { "epoch": 0.3, "learning_rate": 1.9844822037838067e-05, "loss": 0.6442, "step": 799 }, { "epoch": 0.3, "learning_rate": 1.984366486528624e-05, "loss": 0.6565, "step": 800 }, { "epoch": 0.3, "learning_rate": 1.9842503428179797e-05, "loss": 0.5806, "step": 801 }, { "epoch": 0.3, "learning_rate": 1.984133772702191e-05, "loss": 0.6464, "step": 802 }, { "epoch": 0.3, "learning_rate": 1.98401677623176e-05, "loss": 0.5672, "step": 803 }, { "epoch": 0.3, "learning_rate": 1.9838993534573716e-05, "loss": 0.6375, "step": 804 }, { "epoch": 0.3, "learning_rate": 1.9837815044298972e-05, "loss": 0.6354, "step": 805 }, { "epoch": 0.3, "learning_rate": 1.9836632292003926e-05, "loss": 0.5942, "step": 806 }, { "epoch": 0.3, "learning_rate": 1.9835445278200978e-05, "loss": 0.5984, "step": 807 }, { "epoch": 0.3, "learning_rate": 1.983425400340438e-05, "loss": 0.6909, "step": 808 }, { "epoch": 0.31, "learning_rate": 1.9833058468130214e-05, "loss": 0.6768, "step": 809 }, { "epoch": 0.31, "learning_rate": 1.9831858672896432e-05, "loss": 0.6719, "step": 810 }, { "epoch": 0.31, "learning_rate": 1.9830654618222813e-05, "loss": 0.5971, "step": 811 }, { "epoch": 0.31, "learning_rate": 1.9829446304630994e-05, "loss": 0.6165, "step": 812 }, { "epoch": 0.31, "learning_rate": 1.9828233732644442e-05, "loss": 0.7008, "step": 813 }, { "epoch": 0.31, "learning_rate": 1.9827016902788483e-05, "loss": 0.6418, "step": 814 }, { "epoch": 0.31, "learning_rate": 1.982579581559028e-05, "loss": 0.6877, "step": 815 }, { "epoch": 0.31, "learning_rate": 1.9824570471578845e-05, "loss": 0.6401, "step": 816 }, { "epoch": 0.31, "learning_rate": 1.9823340871285032e-05, "loss": 0.6538, "step": 817 }, { "epoch": 0.31, "learning_rate": 1.9822107015241535e-05, "loss": 0.6442, "step": 818 }, { "epoch": 0.31, "learning_rate": 1.9820868903982893e-05, "loss": 0.6706, "step": 819 }, { "epoch": 0.31, "learning_rate": 1.9819626538045503e-05, "loss": 0.6071, "step": 820 }, { "epoch": 0.31, "learning_rate": 1.981837991796758e-05, "loss": 0.6286, "step": 821 }, { "epoch": 0.31, "learning_rate": 1.9817129044289204e-05, "loss": 0.5909, "step": 822 }, { "epoch": 0.31, "learning_rate": 1.9815873917552283e-05, "loss": 0.6974, "step": 823 }, { "epoch": 0.31, "learning_rate": 1.981461453830058e-05, "loss": 0.5908, "step": 824 }, { "epoch": 0.31, "learning_rate": 1.9813350907079684e-05, "loss": 0.6362, "step": 825 }, { "epoch": 0.31, "learning_rate": 1.9812083024437046e-05, "loss": 0.6904, "step": 826 }, { "epoch": 0.31, "learning_rate": 1.9810810890921943e-05, "loss": 0.6307, "step": 827 }, { "epoch": 0.31, "learning_rate": 1.98095345070855e-05, "loss": 0.5889, "step": 828 }, { "epoch": 0.31, "learning_rate": 1.9808253873480687e-05, "loss": 0.6158, "step": 829 }, { "epoch": 0.31, "learning_rate": 1.9806968990662306e-05, "loss": 0.5832, "step": 830 }, { "epoch": 0.31, "learning_rate": 1.9805679859187e-05, "loss": 0.6336, "step": 831 }, { "epoch": 0.31, "learning_rate": 1.9804386479613268e-05, "loss": 0.629, "step": 832 }, { "epoch": 0.31, "learning_rate": 1.9803088852501433e-05, "loss": 0.6093, "step": 833 }, { "epoch": 0.31, "learning_rate": 1.9801786978413665e-05, "loss": 0.7032, "step": 834 }, { "epoch": 0.31, "learning_rate": 1.9800480857913973e-05, "loss": 0.6948, "step": 835 }, { "epoch": 0.32, "learning_rate": 1.97991704915682e-05, "loss": 0.6998, "step": 836 }, { "epoch": 0.32, "learning_rate": 1.9797855879944042e-05, "loss": 0.5997, "step": 837 }, { "epoch": 0.32, "learning_rate": 1.9796537023611024e-05, "loss": 0.6216, "step": 838 }, { "epoch": 0.32, "learning_rate": 1.979521392314051e-05, "loss": 0.6009, "step": 839 }, { "epoch": 0.32, "learning_rate": 1.9793886579105705e-05, "loss": 0.5991, "step": 840 }, { "epoch": 0.32, "learning_rate": 1.979255499208165e-05, "loss": 0.6613, "step": 841 }, { "epoch": 0.32, "learning_rate": 1.979121916264523e-05, "loss": 0.6572, "step": 842 }, { "epoch": 0.32, "learning_rate": 1.978987909137516e-05, "loss": 0.601, "step": 843 }, { "epoch": 0.32, "learning_rate": 1.9788534778852004e-05, "loss": 0.6146, "step": 844 }, { "epoch": 0.32, "learning_rate": 1.978718622565815e-05, "loss": 0.628, "step": 845 }, { "epoch": 0.32, "learning_rate": 1.9785833432377834e-05, "loss": 0.604, "step": 846 }, { "epoch": 0.32, "learning_rate": 1.9784476399597118e-05, "loss": 0.6844, "step": 847 }, { "epoch": 0.32, "learning_rate": 1.9783115127903908e-05, "loss": 0.6108, "step": 848 }, { "epoch": 0.32, "learning_rate": 1.9781749617887955e-05, "loss": 0.6419, "step": 849 }, { "epoch": 0.32, "learning_rate": 1.9780379870140823e-05, "loss": 0.6106, "step": 850 }, { "epoch": 0.32, "learning_rate": 1.9779005885255938e-05, "loss": 0.6897, "step": 851 }, { "epoch": 0.32, "learning_rate": 1.977762766382854e-05, "loss": 0.6248, "step": 852 }, { "epoch": 0.32, "learning_rate": 1.9776245206455718e-05, "loss": 0.6242, "step": 853 }, { "epoch": 0.32, "learning_rate": 1.977485851373639e-05, "loss": 0.6415, "step": 854 }, { "epoch": 0.32, "learning_rate": 1.9773467586271313e-05, "loss": 0.7093, "step": 855 }, { "epoch": 0.32, "learning_rate": 1.9772072424663075e-05, "loss": 0.6375, "step": 856 }, { "epoch": 0.32, "learning_rate": 1.97706730295161e-05, "loss": 0.6506, "step": 857 }, { "epoch": 0.32, "learning_rate": 1.976926940143664e-05, "loss": 0.6408, "step": 858 }, { "epoch": 0.32, "learning_rate": 1.9767861541032796e-05, "loss": 0.6485, "step": 859 }, { "epoch": 0.32, "learning_rate": 1.9766449448914487e-05, "loss": 0.6435, "step": 860 }, { "epoch": 0.32, "learning_rate": 1.976503312569347e-05, "loss": 0.5898, "step": 861 }, { "epoch": 0.33, "learning_rate": 1.9763612571983343e-05, "loss": 0.6382, "step": 862 }, { "epoch": 0.33, "learning_rate": 1.9762187788399524e-05, "loss": 0.6727, "step": 863 }, { "epoch": 0.33, "learning_rate": 1.9760758775559275e-05, "loss": 0.6228, "step": 864 }, { "epoch": 0.33, "learning_rate": 1.9759325534081677e-05, "loss": 0.6213, "step": 865 }, { "epoch": 0.33, "learning_rate": 1.9757888064587658e-05, "loss": 0.5906, "step": 866 }, { "epoch": 0.33, "learning_rate": 1.975644636769997e-05, "loss": 0.6266, "step": 867 }, { "epoch": 0.33, "learning_rate": 1.975500044404319e-05, "loss": 0.5892, "step": 868 }, { "epoch": 0.33, "learning_rate": 1.975355029424374e-05, "loss": 0.5682, "step": 869 }, { "epoch": 0.33, "learning_rate": 1.9752095918929863e-05, "loss": 0.6273, "step": 870 }, { "epoch": 0.33, "learning_rate": 1.975063731873164e-05, "loss": 0.6386, "step": 871 }, { "epoch": 0.33, "learning_rate": 1.974917449428097e-05, "loss": 0.6067, "step": 872 }, { "epoch": 0.33, "learning_rate": 1.97477074462116e-05, "loss": 0.6233, "step": 873 }, { "epoch": 0.33, "learning_rate": 1.9746236175159084e-05, "loss": 0.629, "step": 874 }, { "epoch": 0.33, "learning_rate": 1.9744760681760832e-05, "loss": 0.5907, "step": 875 }, { "epoch": 0.33, "learning_rate": 1.9743280966656063e-05, "loss": 0.6474, "step": 876 }, { "epoch": 0.33, "learning_rate": 1.974179703048583e-05, "loss": 0.6269, "step": 877 }, { "epoch": 0.33, "learning_rate": 1.974030887389302e-05, "loss": 0.6124, "step": 878 }, { "epoch": 0.33, "learning_rate": 1.973881649752234e-05, "loss": 0.6213, "step": 879 }, { "epoch": 0.33, "learning_rate": 1.9737319902020334e-05, "loss": 0.5183, "step": 880 }, { "epoch": 0.33, "learning_rate": 1.9735819088035367e-05, "loss": 0.636, "step": 881 }, { "epoch": 0.33, "learning_rate": 1.9734314056217638e-05, "loss": 0.6803, "step": 882 }, { "epoch": 0.33, "learning_rate": 1.9732804807219163e-05, "loss": 0.6499, "step": 883 }, { "epoch": 0.33, "learning_rate": 1.9731291341693793e-05, "loss": 0.6115, "step": 884 }, { "epoch": 0.33, "learning_rate": 1.972977366029721e-05, "loss": 0.6002, "step": 885 }, { "epoch": 0.33, "learning_rate": 1.9728251763686912e-05, "loss": 0.6118, "step": 886 }, { "epoch": 0.33, "learning_rate": 1.972672565252223e-05, "loss": 0.6517, "step": 887 }, { "epoch": 0.33, "learning_rate": 1.9725195327464315e-05, "loss": 0.6289, "step": 888 }, { "epoch": 0.34, "learning_rate": 1.9723660789176148e-05, "loss": 0.6582, "step": 889 }, { "epoch": 0.34, "learning_rate": 1.972212203832254e-05, "loss": 0.6618, "step": 890 }, { "epoch": 0.34, "learning_rate": 1.9720579075570116e-05, "loss": 0.6765, "step": 891 }, { "epoch": 0.34, "learning_rate": 1.971903190158733e-05, "loss": 0.6591, "step": 892 }, { "epoch": 0.34, "learning_rate": 1.9717480517044463e-05, "loss": 0.6328, "step": 893 }, { "epoch": 0.34, "learning_rate": 1.9715924922613624e-05, "loss": 0.6322, "step": 894 }, { "epoch": 0.34, "learning_rate": 1.971436511896874e-05, "loss": 0.6331, "step": 895 }, { "epoch": 0.34, "learning_rate": 1.971280110678555e-05, "loss": 0.6214, "step": 896 }, { "epoch": 0.34, "learning_rate": 1.9711232886741638e-05, "loss": 0.6178, "step": 897 }, { "epoch": 0.34, "learning_rate": 1.9709660459516403e-05, "loss": 0.6676, "step": 898 }, { "epoch": 0.34, "learning_rate": 1.9708083825791064e-05, "loss": 0.6305, "step": 899 }, { "epoch": 0.34, "learning_rate": 1.970650298624866e-05, "loss": 0.6519, "step": 900 }, { "epoch": 0.34, "learning_rate": 1.9704917941574053e-05, "loss": 0.6339, "step": 901 }, { "epoch": 0.34, "learning_rate": 1.970332869245394e-05, "loss": 0.5588, "step": 902 }, { "epoch": 0.34, "learning_rate": 1.970173523957682e-05, "loss": 0.6295, "step": 903 }, { "epoch": 0.34, "learning_rate": 1.9700137583633022e-05, "loss": 0.6134, "step": 904 }, { "epoch": 0.34, "learning_rate": 1.96985357253147e-05, "loss": 0.5883, "step": 905 }, { "epoch": 0.34, "learning_rate": 1.9696929665315825e-05, "loss": 0.5914, "step": 906 }, { "epoch": 0.34, "learning_rate": 1.9695319404332183e-05, "loss": 0.5837, "step": 907 }, { "epoch": 0.34, "learning_rate": 1.9693704943061386e-05, "loss": 0.607, "step": 908 }, { "epoch": 0.34, "learning_rate": 1.9692086282202866e-05, "loss": 0.5933, "step": 909 }, { "epoch": 0.34, "learning_rate": 1.9690463422457875e-05, "loss": 0.5822, "step": 910 }, { "epoch": 0.34, "learning_rate": 1.9688836364529474e-05, "loss": 0.6442, "step": 911 }, { "epoch": 0.34, "learning_rate": 1.968720510912256e-05, "loss": 0.6351, "step": 912 }, { "epoch": 0.34, "learning_rate": 1.968556965694383e-05, "loss": 0.6116, "step": 913 }, { "epoch": 0.34, "learning_rate": 1.968393000870182e-05, "loss": 0.6876, "step": 914 }, { "epoch": 0.35, "learning_rate": 1.9682286165106855e-05, "loss": 0.6197, "step": 915 }, { "epoch": 0.35, "learning_rate": 1.9680638126871112e-05, "loss": 0.5864, "step": 916 }, { "epoch": 0.35, "learning_rate": 1.9678985894708557e-05, "loss": 0.6406, "step": 917 }, { "epoch": 0.35, "learning_rate": 1.967732946933499e-05, "loss": 0.6652, "step": 918 }, { "epoch": 0.35, "learning_rate": 1.9675668851468016e-05, "loss": 0.6506, "step": 919 }, { "epoch": 0.35, "learning_rate": 1.9674004041827066e-05, "loss": 0.6175, "step": 920 }, { "epoch": 0.35, "learning_rate": 1.967233504113338e-05, "loss": 0.6178, "step": 921 }, { "epoch": 0.35, "learning_rate": 1.967066185011002e-05, "loss": 0.6057, "step": 922 }, { "epoch": 0.35, "learning_rate": 1.9668984469481855e-05, "loss": 0.6255, "step": 923 }, { "epoch": 0.35, "learning_rate": 1.966730289997558e-05, "loss": 0.6109, "step": 924 }, { "epoch": 0.35, "learning_rate": 1.966561714231969e-05, "loss": 0.5919, "step": 925 }, { "epoch": 0.35, "learning_rate": 1.9663927197244507e-05, "loss": 0.6289, "step": 926 }, { "epoch": 0.35, "learning_rate": 1.9662233065482165e-05, "loss": 0.6424, "step": 927 }, { "epoch": 0.35, "learning_rate": 1.966053474776661e-05, "loss": 0.5499, "step": 928 }, { "epoch": 0.35, "learning_rate": 1.96588322448336e-05, "loss": 0.6206, "step": 929 }, { "epoch": 0.35, "learning_rate": 1.9657125557420707e-05, "loss": 0.6839, "step": 930 }, { "epoch": 0.35, "learning_rate": 1.9655414686267317e-05, "loss": 0.6503, "step": 931 }, { "epoch": 0.35, "learning_rate": 1.965369963211463e-05, "loss": 0.612, "step": 932 }, { "epoch": 0.35, "learning_rate": 1.965198039570565e-05, "loss": 0.7065, "step": 933 }, { "epoch": 0.35, "learning_rate": 1.9650256977785205e-05, "loss": 0.6378, "step": 934 }, { "epoch": 0.35, "learning_rate": 1.9648529379099925e-05, "loss": 0.628, "step": 935 }, { "epoch": 0.35, "learning_rate": 1.964679760039826e-05, "loss": 0.6246, "step": 936 }, { "epoch": 0.35, "learning_rate": 1.9645061642430458e-05, "loss": 0.6881, "step": 937 }, { "epoch": 0.35, "learning_rate": 1.9643321505948588e-05, "loss": 0.6283, "step": 938 }, { "epoch": 0.35, "learning_rate": 1.9641577191706526e-05, "loss": 0.6094, "step": 939 }, { "epoch": 0.35, "learning_rate": 1.9639828700459965e-05, "loss": 0.6306, "step": 940 }, { "epoch": 0.35, "learning_rate": 1.9638076032966392e-05, "loss": 0.6737, "step": 941 }, { "epoch": 0.36, "learning_rate": 1.9636319189985115e-05, "loss": 0.6093, "step": 942 }, { "epoch": 0.36, "learning_rate": 1.9634558172277254e-05, "loss": 0.6396, "step": 943 }, { "epoch": 0.36, "learning_rate": 1.9632792980605724e-05, "loss": 0.6099, "step": 944 }, { "epoch": 0.36, "learning_rate": 1.9631023615735255e-05, "loss": 0.6187, "step": 945 }, { "epoch": 0.36, "learning_rate": 1.9629250078432393e-05, "loss": 0.6365, "step": 946 }, { "epoch": 0.36, "learning_rate": 1.9627472369465483e-05, "loss": 0.611, "step": 947 }, { "epoch": 0.36, "learning_rate": 1.9625690489604678e-05, "loss": 0.5708, "step": 948 }, { "epoch": 0.36, "learning_rate": 1.9623904439621936e-05, "loss": 0.6238, "step": 949 }, { "epoch": 0.36, "learning_rate": 1.9622114220291028e-05, "loss": 0.5467, "step": 950 }, { "epoch": 0.36, "learning_rate": 1.9620319832387525e-05, "loss": 0.613, "step": 951 }, { "epoch": 0.36, "learning_rate": 1.961852127668881e-05, "loss": 0.6276, "step": 952 }, { "epoch": 0.36, "learning_rate": 1.9616718553974063e-05, "loss": 0.6162, "step": 953 }, { "epoch": 0.36, "learning_rate": 1.961491166502428e-05, "loss": 0.6049, "step": 954 }, { "epoch": 0.36, "learning_rate": 1.9613100610622254e-05, "loss": 0.6162, "step": 955 }, { "epoch": 0.36, "learning_rate": 1.961128539155259e-05, "loss": 0.5948, "step": 956 }, { "epoch": 0.36, "learning_rate": 1.9609466008601683e-05, "loss": 0.6314, "step": 957 }, { "epoch": 0.36, "learning_rate": 1.9607642462557747e-05, "loss": 0.6509, "step": 958 }, { "epoch": 0.36, "learning_rate": 1.9605814754210794e-05, "loss": 0.6328, "step": 959 }, { "epoch": 0.36, "learning_rate": 1.9603982884352638e-05, "loss": 0.6536, "step": 960 }, { "epoch": 0.36, "learning_rate": 1.9602146853776894e-05, "loss": 0.6196, "step": 961 }, { "epoch": 0.36, "learning_rate": 1.960030666327899e-05, "loss": 0.6129, "step": 962 }, { "epoch": 0.36, "learning_rate": 1.9598462313656143e-05, "loss": 0.5426, "step": 963 }, { "epoch": 0.36, "learning_rate": 1.959661380570738e-05, "loss": 0.6751, "step": 964 }, { "epoch": 0.36, "learning_rate": 1.9594761140233525e-05, "loss": 0.6845, "step": 965 }, { "epoch": 0.36, "learning_rate": 1.9592904318037203e-05, "loss": 0.6396, "step": 966 }, { "epoch": 0.36, "learning_rate": 1.959104333992285e-05, "loss": 0.5904, "step": 967 }, { "epoch": 0.37, "learning_rate": 1.9589178206696685e-05, "loss": 0.6294, "step": 968 }, { "epoch": 0.37, "learning_rate": 1.9587308919166744e-05, "loss": 0.6342, "step": 969 }, { "epoch": 0.37, "learning_rate": 1.9585435478142855e-05, "loss": 0.6435, "step": 970 }, { "epoch": 0.37, "learning_rate": 1.9583557884436637e-05, "loss": 0.6708, "step": 971 }, { "epoch": 0.37, "learning_rate": 1.9581676138861525e-05, "loss": 0.643, "step": 972 }, { "epoch": 0.37, "learning_rate": 1.9579790242232742e-05, "loss": 0.6507, "step": 973 }, { "epoch": 0.37, "learning_rate": 1.9577900195367313e-05, "loss": 0.5919, "step": 974 }, { "epoch": 0.37, "learning_rate": 1.957600599908406e-05, "loss": 0.6168, "step": 975 }, { "epoch": 0.37, "learning_rate": 1.95741076542036e-05, "loss": 0.6355, "step": 976 }, { "epoch": 0.37, "learning_rate": 1.9572205161548356e-05, "loss": 0.6356, "step": 977 }, { "epoch": 0.37, "learning_rate": 1.9570298521942535e-05, "loss": 0.6173, "step": 978 }, { "epoch": 0.37, "learning_rate": 1.9568387736212145e-05, "loss": 0.5859, "step": 979 }, { "epoch": 0.37, "learning_rate": 1.9566472805185003e-05, "loss": 0.5984, "step": 980 }, { "epoch": 0.37, "learning_rate": 1.9564553729690702e-05, "loss": 0.6715, "step": 981 }, { "epoch": 0.37, "learning_rate": 1.9562630510560646e-05, "loss": 0.6576, "step": 982 }, { "epoch": 0.37, "learning_rate": 1.9560703148628023e-05, "loss": 0.6287, "step": 983 }, { "epoch": 0.37, "learning_rate": 1.9558771644727826e-05, "loss": 0.6295, "step": 984 }, { "epoch": 0.37, "learning_rate": 1.9556835999696834e-05, "loss": 0.6187, "step": 985 }, { "epoch": 0.37, "learning_rate": 1.955489621437362e-05, "loss": 0.6896, "step": 986 }, { "epoch": 0.37, "learning_rate": 1.955295228959856e-05, "loss": 0.6272, "step": 987 }, { "epoch": 0.37, "learning_rate": 1.955100422621382e-05, "loss": 0.6199, "step": 988 }, { "epoch": 0.37, "learning_rate": 1.9549052025063343e-05, "loss": 0.6402, "step": 989 }, { "epoch": 0.37, "learning_rate": 1.954709568699289e-05, "loss": 0.6078, "step": 990 }, { "epoch": 0.37, "learning_rate": 1.954513521285e-05, "loss": 0.6008, "step": 991 }, { "epoch": 0.37, "learning_rate": 1.9543170603484003e-05, "loss": 0.6867, "step": 992 }, { "epoch": 0.37, "learning_rate": 1.9541201859746025e-05, "loss": 0.6471, "step": 993 }, { "epoch": 0.37, "learning_rate": 1.953922898248898e-05, "loss": 0.6288, "step": 994 }, { "epoch": 0.38, "learning_rate": 1.9537251972567576e-05, "loss": 0.5959, "step": 995 }, { "epoch": 0.38, "learning_rate": 1.9535270830838308e-05, "loss": 0.5855, "step": 996 }, { "epoch": 0.38, "learning_rate": 1.9533285558159466e-05, "loss": 0.6158, "step": 997 }, { "epoch": 0.38, "learning_rate": 1.9531296155391128e-05, "loss": 0.6122, "step": 998 }, { "epoch": 0.38, "learning_rate": 1.952930262339515e-05, "loss": 0.6489, "step": 999 }, { "epoch": 0.38, "learning_rate": 1.9527304963035196e-05, "loss": 0.6076, "step": 1000 }, { "epoch": 0.38, "learning_rate": 1.9525303175176706e-05, "loss": 0.6417, "step": 1001 }, { "epoch": 0.38, "learning_rate": 1.9523297260686907e-05, "loss": 0.6292, "step": 1002 }, { "epoch": 0.38, "learning_rate": 1.952128722043483e-05, "loss": 0.5625, "step": 1003 }, { "epoch": 0.38, "learning_rate": 1.9519273055291266e-05, "loss": 0.6557, "step": 1004 }, { "epoch": 0.38, "learning_rate": 1.951725476612882e-05, "loss": 0.5336, "step": 1005 }, { "epoch": 0.38, "learning_rate": 1.9515232353821867e-05, "loss": 0.6135, "step": 1006 }, { "epoch": 0.38, "learning_rate": 1.9513205819246575e-05, "loss": 0.64, "step": 1007 }, { "epoch": 0.38, "learning_rate": 1.9511175163280893e-05, "loss": 0.646, "step": 1008 }, { "epoch": 0.38, "learning_rate": 1.9509140386804563e-05, "loss": 0.6151, "step": 1009 }, { "epoch": 0.38, "learning_rate": 1.9507101490699106e-05, "loss": 0.6555, "step": 1010 }, { "epoch": 0.38, "learning_rate": 1.950505847584783e-05, "loss": 0.5825, "step": 1011 }, { "epoch": 0.38, "learning_rate": 1.9503011343135828e-05, "loss": 0.6066, "step": 1012 }, { "epoch": 0.38, "learning_rate": 1.9500960093449973e-05, "loss": 0.5653, "step": 1013 }, { "epoch": 0.38, "learning_rate": 1.9498904727678925e-05, "loss": 0.6141, "step": 1014 }, { "epoch": 0.38, "learning_rate": 1.9496845246713127e-05, "loss": 0.5695, "step": 1015 }, { "epoch": 0.38, "learning_rate": 1.9494781651444806e-05, "loss": 0.6229, "step": 1016 }, { "epoch": 0.38, "learning_rate": 1.949271394276797e-05, "loss": 0.5709, "step": 1017 }, { "epoch": 0.38, "learning_rate": 1.9490642121578407e-05, "loss": 0.6873, "step": 1018 }, { "epoch": 0.38, "learning_rate": 1.948856618877369e-05, "loss": 0.5884, "step": 1019 }, { "epoch": 0.38, "learning_rate": 1.9486486145253173e-05, "loss": 0.6168, "step": 1020 }, { "epoch": 0.38, "learning_rate": 1.9484401991917993e-05, "loss": 0.5277, "step": 1021 }, { "epoch": 0.39, "learning_rate": 1.948231372967105e-05, "loss": 0.5728, "step": 1022 }, { "epoch": 0.39, "learning_rate": 1.9480221359417057e-05, "loss": 0.6359, "step": 1023 }, { "epoch": 0.39, "learning_rate": 1.9478124882062474e-05, "loss": 0.5676, "step": 1024 }, { "epoch": 0.39, "learning_rate": 1.9476024298515563e-05, "loss": 0.6046, "step": 1025 }, { "epoch": 0.39, "learning_rate": 1.9473919609686352e-05, "loss": 0.6278, "step": 1026 }, { "epoch": 0.39, "learning_rate": 1.9471810816486655e-05, "loss": 0.6201, "step": 1027 }, { "epoch": 0.39, "learning_rate": 1.9469697919830058e-05, "loss": 0.6191, "step": 1028 }, { "epoch": 0.39, "learning_rate": 1.9467580920631926e-05, "loss": 0.5933, "step": 1029 }, { "epoch": 0.39, "learning_rate": 1.9465459819809406e-05, "loss": 0.6608, "step": 1030 }, { "epoch": 0.39, "learning_rate": 1.946333461828142e-05, "loss": 0.6343, "step": 1031 }, { "epoch": 0.39, "learning_rate": 1.9461205316968666e-05, "loss": 0.5628, "step": 1032 }, { "epoch": 0.39, "learning_rate": 1.9459071916793614e-05, "loss": 0.5684, "step": 1033 }, { "epoch": 0.39, "learning_rate": 1.9456934418680515e-05, "loss": 0.5582, "step": 1034 }, { "epoch": 0.39, "learning_rate": 1.9454792823555397e-05, "loss": 0.6513, "step": 1035 }, { "epoch": 0.39, "learning_rate": 1.945264713234605e-05, "loss": 0.5805, "step": 1036 }, { "epoch": 0.39, "learning_rate": 1.945049734598206e-05, "loss": 0.5993, "step": 1037 }, { "epoch": 0.39, "learning_rate": 1.944834346539477e-05, "loss": 0.6203, "step": 1038 }, { "epoch": 0.39, "learning_rate": 1.9446185491517304e-05, "loss": 0.5735, "step": 1039 }, { "epoch": 0.39, "learning_rate": 1.9444023425284555e-05, "loss": 0.6205, "step": 1040 }, { "epoch": 0.39, "learning_rate": 1.9441857267633192e-05, "loss": 0.6388, "step": 1041 }, { "epoch": 0.39, "learning_rate": 1.9439687019501657e-05, "loss": 0.5975, "step": 1042 }, { "epoch": 0.39, "learning_rate": 1.9437512681830163e-05, "loss": 0.6301, "step": 1043 }, { "epoch": 0.39, "learning_rate": 1.9435334255560692e-05, "loss": 0.6178, "step": 1044 }, { "epoch": 0.39, "learning_rate": 1.9433151741637e-05, "loss": 0.5757, "step": 1045 }, { "epoch": 0.39, "learning_rate": 1.943096514100462e-05, "loss": 0.665, "step": 1046 }, { "epoch": 0.39, "learning_rate": 1.9428774454610845e-05, "loss": 0.5671, "step": 1047 }, { "epoch": 0.4, "learning_rate": 1.942657968340474e-05, "loss": 0.6118, "step": 1048 }, { "epoch": 0.4, "learning_rate": 1.9424380828337146e-05, "loss": 0.5808, "step": 1049 }, { "epoch": 0.4, "learning_rate": 1.9422177890360667e-05, "loss": 0.6732, "step": 1050 }, { "epoch": 0.4, "learning_rate": 1.9419970870429683e-05, "loss": 0.5967, "step": 1051 }, { "epoch": 0.4, "learning_rate": 1.941775976950033e-05, "loss": 0.5905, "step": 1052 }, { "epoch": 0.4, "learning_rate": 1.9415544588530527e-05, "loss": 0.6532, "step": 1053 }, { "epoch": 0.4, "learning_rate": 1.9413325328479944e-05, "loss": 0.6056, "step": 1054 }, { "epoch": 0.4, "learning_rate": 1.9411101990310038e-05, "loss": 0.6134, "step": 1055 }, { "epoch": 0.4, "learning_rate": 1.9408874574984015e-05, "loss": 0.6061, "step": 1056 }, { "epoch": 0.4, "learning_rate": 1.9406643083466856e-05, "loss": 0.5954, "step": 1057 }, { "epoch": 0.4, "learning_rate": 1.9404407516725307e-05, "loss": 0.5831, "step": 1058 }, { "epoch": 0.4, "learning_rate": 1.940216787572788e-05, "loss": 0.6278, "step": 1059 }, { "epoch": 0.4, "learning_rate": 1.9399924161444852e-05, "loss": 0.6751, "step": 1060 }, { "epoch": 0.4, "learning_rate": 1.9397676374848258e-05, "loss": 0.6489, "step": 1061 }, { "epoch": 0.4, "learning_rate": 1.9395424516911908e-05, "loss": 0.6009, "step": 1062 }, { "epoch": 0.4, "learning_rate": 1.939316858861137e-05, "loss": 0.5963, "step": 1063 }, { "epoch": 0.4, "learning_rate": 1.939090859092397e-05, "loss": 0.5995, "step": 1064 }, { "epoch": 0.4, "learning_rate": 1.9388644524828814e-05, "loss": 0.5991, "step": 1065 }, { "epoch": 0.4, "learning_rate": 1.9386376391306747e-05, "loss": 0.6181, "step": 1066 }, { "epoch": 0.4, "learning_rate": 1.93841041913404e-05, "loss": 0.629, "step": 1067 }, { "epoch": 0.4, "learning_rate": 1.9381827925914146e-05, "loss": 0.6085, "step": 1068 }, { "epoch": 0.4, "learning_rate": 1.9379547596014135e-05, "loss": 0.6452, "step": 1069 }, { "epoch": 0.4, "learning_rate": 1.9377263202628266e-05, "loss": 0.6226, "step": 1070 }, { "epoch": 0.4, "learning_rate": 1.9374974746746207e-05, "loss": 0.5854, "step": 1071 }, { "epoch": 0.4, "learning_rate": 1.9372682229359374e-05, "loss": 0.6055, "step": 1072 }, { "epoch": 0.4, "learning_rate": 1.937038565146096e-05, "loss": 0.6296, "step": 1073 }, { "epoch": 0.4, "learning_rate": 1.93680850140459e-05, "loss": 0.6009, "step": 1074 }, { "epoch": 0.41, "learning_rate": 1.93657803181109e-05, "loss": 0.5922, "step": 1075 }, { "epoch": 0.41, "learning_rate": 1.936347156465442e-05, "loss": 0.6021, "step": 1076 }, { "epoch": 0.41, "learning_rate": 1.936115875467667e-05, "loss": 0.6085, "step": 1077 }, { "epoch": 0.41, "learning_rate": 1.935884188917963e-05, "loss": 0.6227, "step": 1078 }, { "epoch": 0.41, "learning_rate": 1.9356520969167036e-05, "loss": 0.5961, "step": 1079 }, { "epoch": 0.41, "learning_rate": 1.935419599564437e-05, "loss": 0.5599, "step": 1080 }, { "epoch": 0.41, "learning_rate": 1.9351866969618883e-05, "loss": 0.6068, "step": 1081 }, { "epoch": 0.41, "learning_rate": 1.9349533892099564e-05, "loss": 0.6776, "step": 1082 }, { "epoch": 0.41, "learning_rate": 1.9347196764097182e-05, "loss": 0.6287, "step": 1083 }, { "epoch": 0.41, "learning_rate": 1.9344855586624237e-05, "loss": 0.6305, "step": 1084 }, { "epoch": 0.41, "learning_rate": 1.9342510360695e-05, "loss": 0.6333, "step": 1085 }, { "epoch": 0.41, "learning_rate": 1.9340161087325483e-05, "loss": 0.6004, "step": 1086 }, { "epoch": 0.41, "learning_rate": 1.933780776753346e-05, "loss": 0.6291, "step": 1087 }, { "epoch": 0.41, "learning_rate": 1.933545040233846e-05, "loss": 0.5767, "step": 1088 }, { "epoch": 0.41, "learning_rate": 1.9333088992761754e-05, "loss": 0.5904, "step": 1089 }, { "epoch": 0.41, "learning_rate": 1.9330723539826373e-05, "loss": 0.5114, "step": 1090 }, { "epoch": 0.41, "learning_rate": 1.9328354044557105e-05, "loss": 0.6048, "step": 1091 }, { "epoch": 0.41, "learning_rate": 1.9325980507980474e-05, "loss": 0.5718, "step": 1092 }, { "epoch": 0.41, "learning_rate": 1.932360293112477e-05, "loss": 0.6097, "step": 1093 }, { "epoch": 0.41, "learning_rate": 1.932122131502002e-05, "loss": 0.6387, "step": 1094 }, { "epoch": 0.41, "learning_rate": 1.9318835660698016e-05, "loss": 0.6139, "step": 1095 }, { "epoch": 0.41, "learning_rate": 1.931644596919228e-05, "loss": 0.6042, "step": 1096 }, { "epoch": 0.41, "learning_rate": 1.93140522415381e-05, "loss": 0.6539, "step": 1097 }, { "epoch": 0.41, "learning_rate": 1.9311654478772513e-05, "loss": 0.6448, "step": 1098 }, { "epoch": 0.41, "learning_rate": 1.9309252681934286e-05, "loss": 0.6265, "step": 1099 }, { "epoch": 0.41, "learning_rate": 1.9306846852063956e-05, "loss": 0.6143, "step": 1100 }, { "epoch": 0.42, "learning_rate": 1.930443699020379e-05, "loss": 0.6219, "step": 1101 }, { "epoch": 0.42, "learning_rate": 1.930202309739781e-05, "loss": 0.5886, "step": 1102 }, { "epoch": 0.42, "learning_rate": 1.929960517469178e-05, "loss": 0.5646, "step": 1103 }, { "epoch": 0.42, "learning_rate": 1.929718322313322e-05, "loss": 0.638, "step": 1104 }, { "epoch": 0.42, "learning_rate": 1.9294757243771382e-05, "loss": 0.5911, "step": 1105 }, { "epoch": 0.42, "learning_rate": 1.929232723765727e-05, "loss": 0.6469, "step": 1106 }, { "epoch": 0.42, "learning_rate": 1.9289893205843633e-05, "loss": 0.6048, "step": 1107 }, { "epoch": 0.42, "learning_rate": 1.9287455149384965e-05, "loss": 0.6042, "step": 1108 }, { "epoch": 0.42, "learning_rate": 1.9285013069337495e-05, "loss": 0.5989, "step": 1109 }, { "epoch": 0.42, "learning_rate": 1.9282566966759205e-05, "loss": 0.6796, "step": 1110 }, { "epoch": 0.42, "learning_rate": 1.9280116842709812e-05, "loss": 0.6722, "step": 1111 }, { "epoch": 0.42, "learning_rate": 1.9277662698250786e-05, "loss": 0.5762, "step": 1112 }, { "epoch": 0.42, "learning_rate": 1.927520453444533e-05, "loss": 0.61, "step": 1113 }, { "epoch": 0.42, "learning_rate": 1.9272742352358385e-05, "loss": 0.6137, "step": 1114 }, { "epoch": 0.42, "learning_rate": 1.9270276153056643e-05, "loss": 0.6137, "step": 1115 }, { "epoch": 0.42, "learning_rate": 1.926780593760853e-05, "loss": 0.6099, "step": 1116 }, { "epoch": 0.42, "learning_rate": 1.9265331707084214e-05, "loss": 0.5821, "step": 1117 }, { "epoch": 0.42, "learning_rate": 1.92628534625556e-05, "loss": 0.6334, "step": 1118 }, { "epoch": 0.42, "learning_rate": 1.9260371205096336e-05, "loss": 0.5635, "step": 1119 }, { "epoch": 0.42, "learning_rate": 1.9257884935781804e-05, "loss": 0.6361, "step": 1120 }, { "epoch": 0.42, "learning_rate": 1.9255394655689132e-05, "loss": 0.5757, "step": 1121 }, { "epoch": 0.42, "learning_rate": 1.925290036589717e-05, "loss": 0.6325, "step": 1122 }, { "epoch": 0.42, "learning_rate": 1.9250402067486523e-05, "loss": 0.604, "step": 1123 }, { "epoch": 0.42, "learning_rate": 1.9247899761539523e-05, "loss": 0.5643, "step": 1124 }, { "epoch": 0.42, "learning_rate": 1.9245393449140235e-05, "loss": 0.639, "step": 1125 }, { "epoch": 0.42, "learning_rate": 1.9242883131374473e-05, "loss": 0.5972, "step": 1126 }, { "epoch": 0.42, "learning_rate": 1.924036880932977e-05, "loss": 0.6281, "step": 1127 }, { "epoch": 0.43, "learning_rate": 1.923785048409541e-05, "loss": 0.6039, "step": 1128 }, { "epoch": 0.43, "learning_rate": 1.9235328156762395e-05, "loss": 0.6114, "step": 1129 }, { "epoch": 0.43, "learning_rate": 1.9232801828423473e-05, "loss": 0.5847, "step": 1130 }, { "epoch": 0.43, "learning_rate": 1.923027150017312e-05, "loss": 0.5846, "step": 1131 }, { "epoch": 0.43, "learning_rate": 1.9227737173107544e-05, "loss": 0.6206, "step": 1132 }, { "epoch": 0.43, "learning_rate": 1.9225198848324687e-05, "loss": 0.5962, "step": 1133 }, { "epoch": 0.43, "learning_rate": 1.922265652692423e-05, "loss": 0.6177, "step": 1134 }, { "epoch": 0.43, "learning_rate": 1.922011021000757e-05, "loss": 0.5887, "step": 1135 }, { "epoch": 0.43, "learning_rate": 1.921755989867785e-05, "loss": 0.6232, "step": 1136 }, { "epoch": 0.43, "learning_rate": 1.9215005594039932e-05, "loss": 0.651, "step": 1137 }, { "epoch": 0.43, "learning_rate": 1.9212447297200416e-05, "loss": 0.5681, "step": 1138 }, { "epoch": 0.43, "learning_rate": 1.920988500926763e-05, "loss": 0.5776, "step": 1139 }, { "epoch": 0.43, "learning_rate": 1.920731873135163e-05, "loss": 0.6357, "step": 1140 }, { "epoch": 0.43, "learning_rate": 1.9204748464564192e-05, "loss": 0.6211, "step": 1141 }, { "epoch": 0.43, "learning_rate": 1.9202174210018844e-05, "loss": 0.6106, "step": 1142 }, { "epoch": 0.43, "learning_rate": 1.9199595968830808e-05, "loss": 0.5821, "step": 1143 }, { "epoch": 0.43, "learning_rate": 1.9197013742117066e-05, "loss": 0.5922, "step": 1144 }, { "epoch": 0.43, "learning_rate": 1.9194427530996306e-05, "loss": 0.6944, "step": 1145 }, { "epoch": 0.43, "learning_rate": 1.9191837336588944e-05, "loss": 0.619, "step": 1146 }, { "epoch": 0.43, "learning_rate": 1.9189243160017135e-05, "loss": 0.5626, "step": 1147 }, { "epoch": 0.43, "learning_rate": 1.918664500240474e-05, "loss": 0.6215, "step": 1148 }, { "epoch": 0.43, "learning_rate": 1.9184042864877363e-05, "loss": 0.5822, "step": 1149 }, { "epoch": 0.43, "learning_rate": 1.9181436748562315e-05, "loss": 0.6141, "step": 1150 }, { "epoch": 0.43, "learning_rate": 1.9178826654588645e-05, "loss": 0.6098, "step": 1151 }, { "epoch": 0.43, "learning_rate": 1.917621258408712e-05, "loss": 0.6013, "step": 1152 }, { "epoch": 0.43, "learning_rate": 1.917359453819023e-05, "loss": 0.5726, "step": 1153 }, { "epoch": 0.44, "learning_rate": 1.917097251803218e-05, "loss": 0.5975, "step": 1154 }, { "epoch": 0.44, "learning_rate": 1.916834652474891e-05, "loss": 0.5809, "step": 1155 }, { "epoch": 0.44, "learning_rate": 1.9165716559478074e-05, "loss": 0.6445, "step": 1156 }, { "epoch": 0.44, "learning_rate": 1.9163082623359048e-05, "loss": 0.5707, "step": 1157 }, { "epoch": 0.44, "learning_rate": 1.9160444717532923e-05, "loss": 0.5629, "step": 1158 }, { "epoch": 0.44, "learning_rate": 1.9157802843142522e-05, "loss": 0.5712, "step": 1159 }, { "epoch": 0.44, "learning_rate": 1.9155157001332374e-05, "loss": 0.6366, "step": 1160 }, { "epoch": 0.44, "learning_rate": 1.9152507193248733e-05, "loss": 0.6231, "step": 1161 }, { "epoch": 0.44, "learning_rate": 1.9149853420039575e-05, "loss": 0.6354, "step": 1162 }, { "epoch": 0.44, "learning_rate": 1.9147195682854582e-05, "loss": 0.6483, "step": 1163 }, { "epoch": 0.44, "learning_rate": 1.914453398284517e-05, "loss": 0.6096, "step": 1164 }, { "epoch": 0.44, "learning_rate": 1.914186832116446e-05, "loss": 0.6294, "step": 1165 }, { "epoch": 0.44, "learning_rate": 1.913919869896729e-05, "loss": 0.636, "step": 1166 }, { "epoch": 0.44, "learning_rate": 1.9136525117410216e-05, "loss": 0.598, "step": 1167 }, { "epoch": 0.44, "learning_rate": 1.9133847577651515e-05, "loss": 0.6317, "step": 1168 }, { "epoch": 0.44, "learning_rate": 1.9131166080851163e-05, "loss": 0.5948, "step": 1169 }, { "epoch": 0.44, "learning_rate": 1.912848062817087e-05, "loss": 0.5937, "step": 1170 }, { "epoch": 0.44, "learning_rate": 1.9125791220774043e-05, "loss": 0.5689, "step": 1171 }, { "epoch": 0.44, "learning_rate": 1.9123097859825816e-05, "loss": 0.5786, "step": 1172 }, { "epoch": 0.44, "learning_rate": 1.9120400546493024e-05, "loss": 0.5913, "step": 1173 }, { "epoch": 0.44, "learning_rate": 1.9117699281944223e-05, "loss": 0.6044, "step": 1174 }, { "epoch": 0.44, "learning_rate": 1.9114994067349676e-05, "loss": 0.6431, "step": 1175 }, { "epoch": 0.44, "learning_rate": 1.911228490388136e-05, "loss": 0.6083, "step": 1176 }, { "epoch": 0.44, "learning_rate": 1.9109571792712956e-05, "loss": 0.6054, "step": 1177 }, { "epoch": 0.44, "learning_rate": 1.910685473501987e-05, "loss": 0.5706, "step": 1178 }, { "epoch": 0.44, "learning_rate": 1.91041337319792e-05, "loss": 0.6163, "step": 1179 }, { "epoch": 0.44, "learning_rate": 1.9101408784769763e-05, "loss": 0.5418, "step": 1180 }, { "epoch": 0.45, "learning_rate": 1.909867989457209e-05, "loss": 0.6068, "step": 1181 }, { "epoch": 0.45, "learning_rate": 1.9095947062568406e-05, "loss": 0.5985, "step": 1182 }, { "epoch": 0.45, "learning_rate": 1.9093210289942654e-05, "loss": 0.5828, "step": 1183 }, { "epoch": 0.45, "learning_rate": 1.909046957788048e-05, "loss": 0.5875, "step": 1184 }, { "epoch": 0.45, "learning_rate": 1.9087724927569242e-05, "loss": 0.5905, "step": 1185 }, { "epoch": 0.45, "learning_rate": 1.9084976340197995e-05, "loss": 0.5971, "step": 1186 }, { "epoch": 0.45, "learning_rate": 1.9082223816957512e-05, "loss": 0.6049, "step": 1187 }, { "epoch": 0.45, "learning_rate": 1.9079467359040257e-05, "loss": 0.568, "step": 1188 }, { "epoch": 0.45, "learning_rate": 1.907670696764041e-05, "loss": 0.6151, "step": 1189 }, { "epoch": 0.45, "learning_rate": 1.907394264395385e-05, "loss": 0.6306, "step": 1190 }, { "epoch": 0.45, "learning_rate": 1.907117438917816e-05, "loss": 0.5936, "step": 1191 }, { "epoch": 0.45, "learning_rate": 1.9068402204512626e-05, "loss": 0.6159, "step": 1192 }, { "epoch": 0.45, "learning_rate": 1.9065626091158235e-05, "loss": 0.5834, "step": 1193 }, { "epoch": 0.45, "learning_rate": 1.9062846050317683e-05, "loss": 0.5927, "step": 1194 }, { "epoch": 0.45, "learning_rate": 1.9060062083195356e-05, "loss": 0.6657, "step": 1195 }, { "epoch": 0.45, "learning_rate": 1.9057274190997354e-05, "loss": 0.5903, "step": 1196 }, { "epoch": 0.45, "learning_rate": 1.905448237493147e-05, "loss": 0.5685, "step": 1197 }, { "epoch": 0.45, "learning_rate": 1.9051686636207193e-05, "loss": 0.5976, "step": 1198 }, { "epoch": 0.45, "learning_rate": 1.9048886976035718e-05, "loss": 0.5947, "step": 1199 }, { "epoch": 0.45, "learning_rate": 1.904608339562994e-05, "loss": 0.6038, "step": 1200 }, { "epoch": 0.45, "learning_rate": 1.9043275896204446e-05, "loss": 0.6331, "step": 1201 }, { "epoch": 0.45, "learning_rate": 1.9040464478975532e-05, "loss": 0.5784, "step": 1202 }, { "epoch": 0.45, "learning_rate": 1.903764914516117e-05, "loss": 0.6361, "step": 1203 }, { "epoch": 0.45, "learning_rate": 1.9034829895981052e-05, "loss": 0.6139, "step": 1204 }, { "epoch": 0.45, "learning_rate": 1.9032006732656557e-05, "loss": 0.6588, "step": 1205 }, { "epoch": 0.45, "learning_rate": 1.9029179656410757e-05, "loss": 0.6179, "step": 1206 }, { "epoch": 0.46, "learning_rate": 1.9026348668468423e-05, "loss": 0.5898, "step": 1207 }, { "epoch": 0.46, "learning_rate": 1.9023513770056013e-05, "loss": 0.6623, "step": 1208 }, { "epoch": 0.46, "learning_rate": 1.90206749624017e-05, "loss": 0.637, "step": 1209 }, { "epoch": 0.46, "learning_rate": 1.9017832246735317e-05, "loss": 0.6404, "step": 1210 }, { "epoch": 0.46, "learning_rate": 1.901498562428842e-05, "loss": 0.5977, "step": 1211 }, { "epoch": 0.46, "learning_rate": 1.9012135096294246e-05, "loss": 0.6203, "step": 1212 }, { "epoch": 0.46, "learning_rate": 1.9009280663987724e-05, "loss": 0.604, "step": 1213 }, { "epoch": 0.46, "learning_rate": 1.900642232860547e-05, "loss": 0.6145, "step": 1214 }, { "epoch": 0.46, "learning_rate": 1.9003560091385808e-05, "loss": 0.609, "step": 1215 }, { "epoch": 0.46, "learning_rate": 1.9000693953568726e-05, "loss": 0.6504, "step": 1216 }, { "epoch": 0.46, "learning_rate": 1.8997823916395922e-05, "loss": 0.6082, "step": 1217 }, { "epoch": 0.46, "learning_rate": 1.8994949981110783e-05, "loss": 0.6528, "step": 1218 }, { "epoch": 0.46, "learning_rate": 1.8992072148958368e-05, "loss": 0.6092, "step": 1219 }, { "epoch": 0.46, "learning_rate": 1.8989190421185446e-05, "loss": 0.6217, "step": 1220 }, { "epoch": 0.46, "learning_rate": 1.898630479904046e-05, "loss": 0.5745, "step": 1221 }, { "epoch": 0.46, "learning_rate": 1.898341528377354e-05, "loss": 0.5851, "step": 1222 }, { "epoch": 0.46, "learning_rate": 1.8980521876636508e-05, "loss": 0.5885, "step": 1223 }, { "epoch": 0.46, "learning_rate": 1.8977624578882867e-05, "loss": 0.622, "step": 1224 }, { "epoch": 0.46, "learning_rate": 1.8974723391767815e-05, "loss": 0.5669, "step": 1225 }, { "epoch": 0.46, "learning_rate": 1.8971818316548226e-05, "loss": 0.5704, "step": 1226 }, { "epoch": 0.46, "learning_rate": 1.8968909354482657e-05, "loss": 0.5921, "step": 1227 }, { "epoch": 0.46, "learning_rate": 1.8965996506831356e-05, "loss": 0.5767, "step": 1228 }, { "epoch": 0.46, "learning_rate": 1.896307977485625e-05, "loss": 0.5903, "step": 1229 }, { "epoch": 0.46, "learning_rate": 1.8960159159820954e-05, "loss": 0.5799, "step": 1230 }, { "epoch": 0.46, "learning_rate": 1.895723466299076e-05, "loss": 0.5744, "step": 1231 }, { "epoch": 0.46, "learning_rate": 1.8954306285632634e-05, "loss": 0.6307, "step": 1232 }, { "epoch": 0.46, "learning_rate": 1.8951374029015244e-05, "loss": 0.6528, "step": 1233 }, { "epoch": 0.47, "learning_rate": 1.894843789440892e-05, "loss": 0.5971, "step": 1234 }, { "epoch": 0.47, "learning_rate": 1.894549788308568e-05, "loss": 0.5631, "step": 1235 }, { "epoch": 0.47, "learning_rate": 1.8942553996319223e-05, "loss": 0.6006, "step": 1236 }, { "epoch": 0.47, "learning_rate": 1.893960623538492e-05, "loss": 0.6648, "step": 1237 }, { "epoch": 0.47, "learning_rate": 1.8936654601559826e-05, "loss": 0.6469, "step": 1238 }, { "epoch": 0.47, "learning_rate": 1.8933699096122673e-05, "loss": 0.6471, "step": 1239 }, { "epoch": 0.47, "learning_rate": 1.8930739720353873e-05, "loss": 0.6126, "step": 1240 }, { "epoch": 0.47, "learning_rate": 1.89277764755355e-05, "loss": 0.6462, "step": 1241 }, { "epoch": 0.47, "learning_rate": 1.892480936295133e-05, "loss": 0.6135, "step": 1242 }, { "epoch": 0.47, "learning_rate": 1.8921838383886793e-05, "loss": 0.5683, "step": 1243 }, { "epoch": 0.47, "learning_rate": 1.8918863539629005e-05, "loss": 0.6745, "step": 1244 }, { "epoch": 0.47, "learning_rate": 1.8915884831466744e-05, "loss": 0.6274, "step": 1245 }, { "epoch": 0.47, "learning_rate": 1.8912902260690482e-05, "loss": 0.5687, "step": 1246 }, { "epoch": 0.47, "learning_rate": 1.8909915828592343e-05, "loss": 0.643, "step": 1247 }, { "epoch": 0.47, "learning_rate": 1.8906925536466144e-05, "loss": 0.688, "step": 1248 }, { "epoch": 0.47, "learning_rate": 1.890393138560736e-05, "loss": 0.6093, "step": 1249 }, { "epoch": 0.47, "learning_rate": 1.8900933377313138e-05, "loss": 0.6307, "step": 1250 }, { "epoch": 0.47, "learning_rate": 1.8897931512882308e-05, "loss": 0.5589, "step": 1251 }, { "epoch": 0.47, "learning_rate": 1.889492579361535e-05, "loss": 0.6765, "step": 1252 }, { "epoch": 0.47, "learning_rate": 1.889191622081444e-05, "loss": 0.6858, "step": 1253 }, { "epoch": 0.47, "learning_rate": 1.8888902795783405e-05, "loss": 0.6287, "step": 1254 }, { "epoch": 0.47, "learning_rate": 1.888588551982775e-05, "loss": 0.6245, "step": 1255 }, { "epoch": 0.47, "learning_rate": 1.8882864394254636e-05, "loss": 0.7222, "step": 1256 }, { "epoch": 0.47, "learning_rate": 1.8879839420372906e-05, "loss": 0.6252, "step": 1257 }, { "epoch": 0.47, "learning_rate": 1.8876810599493067e-05, "loss": 0.5675, "step": 1258 }, { "epoch": 0.47, "learning_rate": 1.8873777932927285e-05, "loss": 0.6392, "step": 1259 }, { "epoch": 0.48, "learning_rate": 1.88707414219894e-05, "loss": 0.6087, "step": 1260 }, { "epoch": 0.48, "learning_rate": 1.886770106799491e-05, "loss": 0.5363, "step": 1261 }, { "epoch": 0.48, "learning_rate": 1.8864656872260985e-05, "loss": 0.6225, "step": 1262 }, { "epoch": 0.48, "learning_rate": 1.8861608836106464e-05, "loss": 0.6104, "step": 1263 }, { "epoch": 0.48, "learning_rate": 1.8858556960851833e-05, "loss": 0.6324, "step": 1264 }, { "epoch": 0.48, "learning_rate": 1.8855501247819254e-05, "loss": 0.6063, "step": 1265 }, { "epoch": 0.48, "learning_rate": 1.8852441698332547e-05, "loss": 0.6447, "step": 1266 }, { "epoch": 0.48, "learning_rate": 1.8849378313717198e-05, "loss": 0.597, "step": 1267 }, { "epoch": 0.48, "learning_rate": 1.8846311095300357e-05, "loss": 0.6088, "step": 1268 }, { "epoch": 0.48, "learning_rate": 1.8843240044410817e-05, "loss": 0.6265, "step": 1269 }, { "epoch": 0.48, "learning_rate": 1.8840165162379052e-05, "loss": 0.5856, "step": 1270 }, { "epoch": 0.48, "learning_rate": 1.8837086450537195e-05, "loss": 0.5702, "step": 1271 }, { "epoch": 0.48, "learning_rate": 1.8834003910219018e-05, "loss": 0.6074, "step": 1272 }, { "epoch": 0.48, "learning_rate": 1.8830917542759972e-05, "loss": 0.5702, "step": 1273 }, { "epoch": 0.48, "learning_rate": 1.8827827349497158e-05, "loss": 0.5819, "step": 1274 }, { "epoch": 0.48, "learning_rate": 1.882473333176933e-05, "loss": 0.6608, "step": 1275 }, { "epoch": 0.48, "learning_rate": 1.8821635490916917e-05, "loss": 0.5689, "step": 1276 }, { "epoch": 0.48, "learning_rate": 1.881853382828198e-05, "loss": 0.5765, "step": 1277 }, { "epoch": 0.48, "learning_rate": 1.8815428345208247e-05, "loss": 0.6142, "step": 1278 }, { "epoch": 0.48, "learning_rate": 1.881231904304111e-05, "loss": 0.6654, "step": 1279 }, { "epoch": 0.48, "learning_rate": 1.8809205923127602e-05, "loss": 0.6164, "step": 1280 }, { "epoch": 0.48, "learning_rate": 1.880608898681641e-05, "loss": 0.6459, "step": 1281 }, { "epoch": 0.48, "learning_rate": 1.8802968235457885e-05, "loss": 0.5819, "step": 1282 }, { "epoch": 0.48, "learning_rate": 1.8799843670404026e-05, "loss": 0.6594, "step": 1283 }, { "epoch": 0.48, "learning_rate": 1.879671529300848e-05, "loss": 0.6326, "step": 1284 }, { "epoch": 0.48, "learning_rate": 1.8793583104626546e-05, "loss": 0.6061, "step": 1285 }, { "epoch": 0.48, "learning_rate": 1.8790447106615187e-05, "loss": 0.5931, "step": 1286 }, { "epoch": 0.49, "learning_rate": 1.8787307300332992e-05, "loss": 0.5524, "step": 1287 }, { "epoch": 0.49, "learning_rate": 1.8784163687140226e-05, "loss": 0.6253, "step": 1288 }, { "epoch": 0.49, "learning_rate": 1.8781016268398787e-05, "loss": 0.6096, "step": 1289 }, { "epoch": 0.49, "learning_rate": 1.8777865045472228e-05, "loss": 0.6312, "step": 1290 }, { "epoch": 0.49, "learning_rate": 1.8774710019725743e-05, "loss": 0.556, "step": 1291 }, { "epoch": 0.49, "learning_rate": 1.8771551192526182e-05, "loss": 0.6444, "step": 1292 }, { "epoch": 0.49, "learning_rate": 1.876838856524204e-05, "loss": 0.6459, "step": 1293 }, { "epoch": 0.49, "learning_rate": 1.876522213924345e-05, "loss": 0.5893, "step": 1294 }, { "epoch": 0.49, "learning_rate": 1.8762051915902205e-05, "loss": 0.5649, "step": 1295 }, { "epoch": 0.49, "learning_rate": 1.8758877896591735e-05, "loss": 0.6067, "step": 1296 }, { "epoch": 0.49, "learning_rate": 1.875570008268711e-05, "loss": 0.587, "step": 1297 }, { "epoch": 0.49, "learning_rate": 1.8752518475565052e-05, "loss": 0.6247, "step": 1298 }, { "epoch": 0.49, "learning_rate": 1.8749333076603926e-05, "loss": 0.6292, "step": 1299 }, { "epoch": 0.49, "learning_rate": 1.8746143887183733e-05, "loss": 0.6161, "step": 1300 }, { "epoch": 0.49, "learning_rate": 1.8742950908686124e-05, "loss": 0.5563, "step": 1301 }, { "epoch": 0.49, "learning_rate": 1.873975414249438e-05, "loss": 0.6579, "step": 1302 }, { "epoch": 0.49, "learning_rate": 1.873655358999344e-05, "loss": 0.5551, "step": 1303 }, { "epoch": 0.49, "learning_rate": 1.8733349252569873e-05, "loss": 0.6213, "step": 1304 }, { "epoch": 0.49, "learning_rate": 1.8730141131611882e-05, "loss": 0.5773, "step": 1305 }, { "epoch": 0.49, "learning_rate": 1.8726929228509324e-05, "loss": 0.5864, "step": 1306 }, { "epoch": 0.49, "learning_rate": 1.8723713544653678e-05, "loss": 0.5794, "step": 1307 }, { "epoch": 0.49, "learning_rate": 1.872049408143808e-05, "loss": 0.6323, "step": 1308 }, { "epoch": 0.49, "learning_rate": 1.8717270840257282e-05, "loss": 0.6166, "step": 1309 }, { "epoch": 0.49, "learning_rate": 1.871404382250769e-05, "loss": 0.5875, "step": 1310 }, { "epoch": 0.49, "learning_rate": 1.8710813029587335e-05, "loss": 0.597, "step": 1311 }, { "epoch": 0.49, "learning_rate": 1.8707578462895893e-05, "loss": 0.5987, "step": 1312 }, { "epoch": 0.5, "learning_rate": 1.8704340123834668e-05, "loss": 0.5944, "step": 1313 }, { "epoch": 0.5, "learning_rate": 1.8701098013806597e-05, "loss": 0.649, "step": 1314 }, { "epoch": 0.5, "learning_rate": 1.8697852134216258e-05, "loss": 0.5591, "step": 1315 }, { "epoch": 0.5, "learning_rate": 1.869460248646986e-05, "loss": 0.6533, "step": 1316 }, { "epoch": 0.5, "learning_rate": 1.8691349071975238e-05, "loss": 0.6146, "step": 1317 }, { "epoch": 0.5, "learning_rate": 1.8688091892141863e-05, "loss": 0.5701, "step": 1318 }, { "epoch": 0.5, "learning_rate": 1.8684830948380842e-05, "loss": 0.6264, "step": 1319 }, { "epoch": 0.5, "learning_rate": 1.86815662421049e-05, "loss": 0.5835, "step": 1320 }, { "epoch": 0.5, "learning_rate": 1.8678297774728415e-05, "loss": 0.6022, "step": 1321 }, { "epoch": 0.5, "learning_rate": 1.867502554766736e-05, "loss": 0.6019, "step": 1322 }, { "epoch": 0.5, "learning_rate": 1.8671749562339377e-05, "loss": 0.5874, "step": 1323 }, { "epoch": 0.5, "learning_rate": 1.8668469820163698e-05, "loss": 0.6045, "step": 1324 }, { "epoch": 0.5, "learning_rate": 1.8665186322561216e-05, "loss": 0.5626, "step": 1325 }, { "epoch": 0.5, "learning_rate": 1.8661899070954424e-05, "loss": 0.6348, "step": 1326 }, { "epoch": 0.5, "learning_rate": 1.8658608066767453e-05, "loss": 0.5878, "step": 1327 }, { "epoch": 0.5, "learning_rate": 1.8655313311426066e-05, "loss": 0.606, "step": 1328 }, { "epoch": 0.5, "learning_rate": 1.8652014806357638e-05, "loss": 0.5864, "step": 1329 }, { "epoch": 0.5, "learning_rate": 1.8648712552991178e-05, "loss": 0.5567, "step": 1330 }, { "epoch": 0.5, "learning_rate": 1.8645406552757316e-05, "loss": 0.6039, "step": 1331 }, { "epoch": 0.5, "learning_rate": 1.8642096807088305e-05, "loss": 0.6077, "step": 1332 }, { "epoch": 0.5, "learning_rate": 1.863878331741802e-05, "loss": 0.6156, "step": 1333 }, { "epoch": 0.5, "learning_rate": 1.8635466085181955e-05, "loss": 0.58, "step": 1334 }, { "epoch": 0.5, "learning_rate": 1.8632145111817233e-05, "loss": 0.5728, "step": 1335 }, { "epoch": 0.5, "learning_rate": 1.8628820398762598e-05, "loss": 0.5969, "step": 1336 }, { "epoch": 0.5, "learning_rate": 1.8625491947458402e-05, "loss": 0.5809, "step": 1337 }, { "epoch": 0.5, "learning_rate": 1.862215975934663e-05, "loss": 0.6253, "step": 1338 }, { "epoch": 0.5, "learning_rate": 1.8618823835870877e-05, "loss": 0.6172, "step": 1339 }, { "epoch": 0.51, "learning_rate": 1.861548417847636e-05, "loss": 0.5943, "step": 1340 }, { "epoch": 0.51, "learning_rate": 1.861214078860991e-05, "loss": 0.6203, "step": 1341 }, { "epoch": 0.51, "learning_rate": 1.860879366771999e-05, "loss": 0.5938, "step": 1342 }, { "epoch": 0.51, "learning_rate": 1.860544281725665e-05, "loss": 0.5966, "step": 1343 }, { "epoch": 0.51, "learning_rate": 1.860208823867159e-05, "loss": 0.5427, "step": 1344 }, { "epoch": 0.51, "learning_rate": 1.8598729933418102e-05, "loss": 0.6019, "step": 1345 }, { "epoch": 0.51, "learning_rate": 1.85953679029511e-05, "loss": 0.5437, "step": 1346 }, { "epoch": 0.51, "learning_rate": 1.8592002148727103e-05, "loss": 0.5679, "step": 1347 }, { "epoch": 0.51, "learning_rate": 1.8588632672204264e-05, "loss": 0.6117, "step": 1348 }, { "epoch": 0.51, "learning_rate": 1.8585259474842324e-05, "loss": 0.6096, "step": 1349 }, { "epoch": 0.51, "learning_rate": 1.8581882558102657e-05, "loss": 0.5834, "step": 1350 }, { "epoch": 0.51, "learning_rate": 1.8578501923448232e-05, "loss": 0.6489, "step": 1351 }, { "epoch": 0.51, "learning_rate": 1.8575117572343643e-05, "loss": 0.5314, "step": 1352 }, { "epoch": 0.51, "learning_rate": 1.8571729506255082e-05, "loss": 0.5885, "step": 1353 }, { "epoch": 0.51, "learning_rate": 1.8568337726650352e-05, "loss": 0.6239, "step": 1354 }, { "epoch": 0.51, "learning_rate": 1.8564942234998873e-05, "loss": 0.5899, "step": 1355 }, { "epoch": 0.51, "learning_rate": 1.856154303277167e-05, "loss": 0.6016, "step": 1356 }, { "epoch": 0.51, "learning_rate": 1.855814012144137e-05, "loss": 0.6232, "step": 1357 }, { "epoch": 0.51, "learning_rate": 1.8554733502482212e-05, "loss": 0.5998, "step": 1358 }, { "epoch": 0.51, "learning_rate": 1.8551323177370037e-05, "loss": 0.5989, "step": 1359 }, { "epoch": 0.51, "learning_rate": 1.85479091475823e-05, "loss": 0.6309, "step": 1360 }, { "epoch": 0.51, "learning_rate": 1.854449141459805e-05, "loss": 0.642, "step": 1361 }, { "epoch": 0.51, "learning_rate": 1.8541069979897952e-05, "loss": 0.5692, "step": 1362 }, { "epoch": 0.51, "learning_rate": 1.8537644844964263e-05, "loss": 0.616, "step": 1363 }, { "epoch": 0.51, "learning_rate": 1.853421601128085e-05, "loss": 0.6316, "step": 1364 }, { "epoch": 0.51, "learning_rate": 1.853078348033318e-05, "loss": 0.6103, "step": 1365 }, { "epoch": 0.52, "learning_rate": 1.8527347253608322e-05, "loss": 0.6003, "step": 1366 }, { "epoch": 0.52, "learning_rate": 1.8523907332594954e-05, "loss": 0.5876, "step": 1367 }, { "epoch": 0.52, "learning_rate": 1.852046371878334e-05, "loss": 0.5483, "step": 1368 }, { "epoch": 0.52, "learning_rate": 1.851701641366535e-05, "loss": 0.5992, "step": 1369 }, { "epoch": 0.52, "learning_rate": 1.851356541873446e-05, "loss": 0.6386, "step": 1370 }, { "epoch": 0.52, "learning_rate": 1.8510110735485737e-05, "loss": 0.5869, "step": 1371 }, { "epoch": 0.52, "learning_rate": 1.8506652365415843e-05, "loss": 0.5883, "step": 1372 }, { "epoch": 0.52, "learning_rate": 1.8503190310023053e-05, "loss": 0.5717, "step": 1373 }, { "epoch": 0.52, "learning_rate": 1.8499724570807217e-05, "loss": 0.5698, "step": 1374 }, { "epoch": 0.52, "learning_rate": 1.8496255149269792e-05, "loss": 0.5555, "step": 1375 }, { "epoch": 0.52, "learning_rate": 1.8492782046913836e-05, "loss": 0.572, "step": 1376 }, { "epoch": 0.52, "learning_rate": 1.848930526524399e-05, "loss": 0.6284, "step": 1377 }, { "epoch": 0.52, "learning_rate": 1.8485824805766496e-05, "loss": 0.614, "step": 1378 }, { "epoch": 0.52, "learning_rate": 1.8482340669989187e-05, "loss": 0.5684, "step": 1379 }, { "epoch": 0.52, "learning_rate": 1.8478852859421494e-05, "loss": 0.5816, "step": 1380 }, { "epoch": 0.52, "learning_rate": 1.847536137557443e-05, "loss": 0.5716, "step": 1381 }, { "epoch": 0.52, "learning_rate": 1.8471866219960604e-05, "loss": 0.6795, "step": 1382 }, { "epoch": 0.52, "learning_rate": 1.8468367394094222e-05, "loss": 0.5756, "step": 1383 }, { "epoch": 0.52, "learning_rate": 1.846486489949107e-05, "loss": 0.5432, "step": 1384 }, { "epoch": 0.52, "learning_rate": 1.846135873766853e-05, "loss": 0.6224, "step": 1385 }, { "epoch": 0.52, "learning_rate": 1.845784891014557e-05, "loss": 0.6029, "step": 1386 }, { "epoch": 0.52, "learning_rate": 1.845433541844275e-05, "loss": 0.5925, "step": 1387 }, { "epoch": 0.52, "learning_rate": 1.8450818264082205e-05, "loss": 0.6292, "step": 1388 }, { "epoch": 0.52, "learning_rate": 1.8447297448587677e-05, "loss": 0.6633, "step": 1389 }, { "epoch": 0.52, "learning_rate": 1.8443772973484474e-05, "loss": 0.6529, "step": 1390 }, { "epoch": 0.52, "learning_rate": 1.8440244840299507e-05, "loss": 0.5893, "step": 1391 }, { "epoch": 0.52, "learning_rate": 1.8436713050561255e-05, "loss": 0.6042, "step": 1392 }, { "epoch": 0.53, "learning_rate": 1.8433177605799796e-05, "loss": 0.6436, "step": 1393 }, { "epoch": 0.53, "learning_rate": 1.8429638507546778e-05, "loss": 0.6128, "step": 1394 }, { "epoch": 0.53, "learning_rate": 1.8426095757335444e-05, "loss": 0.5941, "step": 1395 }, { "epoch": 0.53, "learning_rate": 1.8422549356700617e-05, "loss": 0.5908, "step": 1396 }, { "epoch": 0.53, "learning_rate": 1.8418999307178692e-05, "loss": 0.5886, "step": 1397 }, { "epoch": 0.53, "learning_rate": 1.8415445610307648e-05, "loss": 0.5973, "step": 1398 }, { "epoch": 0.53, "learning_rate": 1.841188826762706e-05, "loss": 0.6082, "step": 1399 }, { "epoch": 0.53, "learning_rate": 1.8408327280678057e-05, "loss": 0.6323, "step": 1400 }, { "epoch": 0.53, "learning_rate": 1.8404762651003367e-05, "loss": 0.5917, "step": 1401 }, { "epoch": 0.53, "learning_rate": 1.8401194380147282e-05, "loss": 0.6063, "step": 1402 }, { "epoch": 0.53, "learning_rate": 1.8397622469655685e-05, "loss": 0.5412, "step": 1403 }, { "epoch": 0.53, "learning_rate": 1.8394046921076027e-05, "loss": 0.5511, "step": 1404 }, { "epoch": 0.53, "learning_rate": 1.8390467735957332e-05, "loss": 0.6041, "step": 1405 }, { "epoch": 0.53, "learning_rate": 1.8386884915850208e-05, "loss": 0.6786, "step": 1406 }, { "epoch": 0.53, "learning_rate": 1.8383298462306837e-05, "loss": 0.6082, "step": 1407 }, { "epoch": 0.53, "learning_rate": 1.837970837688097e-05, "loss": 0.6121, "step": 1408 }, { "epoch": 0.53, "learning_rate": 1.837611466112793e-05, "loss": 0.6156, "step": 1409 }, { "epoch": 0.53, "learning_rate": 1.8372517316604626e-05, "loss": 0.6114, "step": 1410 }, { "epoch": 0.53, "learning_rate": 1.8368916344869522e-05, "loss": 0.608, "step": 1411 }, { "epoch": 0.53, "learning_rate": 1.8365311747482658e-05, "loss": 0.6229, "step": 1412 }, { "epoch": 0.53, "learning_rate": 1.8361703526005657e-05, "loss": 0.5828, "step": 1413 }, { "epoch": 0.53, "learning_rate": 1.8358091682001698e-05, "loss": 0.5962, "step": 1414 }, { "epoch": 0.53, "learning_rate": 1.835447621703553e-05, "loss": 0.6202, "step": 1415 }, { "epoch": 0.53, "learning_rate": 1.8350857132673484e-05, "loss": 0.5682, "step": 1416 }, { "epoch": 0.53, "learning_rate": 1.8347234430483444e-05, "loss": 0.6188, "step": 1417 }, { "epoch": 0.53, "learning_rate": 1.834360811203487e-05, "loss": 0.6323, "step": 1418 }, { "epoch": 0.54, "learning_rate": 1.833997817889878e-05, "loss": 0.5683, "step": 1419 }, { "epoch": 0.54, "learning_rate": 1.8336344632647773e-05, "loss": 0.5847, "step": 1420 }, { "epoch": 0.54, "learning_rate": 1.8332707474855995e-05, "loss": 0.5868, "step": 1421 }, { "epoch": 0.54, "learning_rate": 1.8329066707099175e-05, "loss": 0.6102, "step": 1422 }, { "epoch": 0.54, "learning_rate": 1.8325422330954593e-05, "loss": 0.6634, "step": 1423 }, { "epoch": 0.54, "learning_rate": 1.832177434800109e-05, "loss": 0.5696, "step": 1424 }, { "epoch": 0.54, "learning_rate": 1.8318122759819084e-05, "loss": 0.5932, "step": 1425 }, { "epoch": 0.54, "learning_rate": 1.8314467567990542e-05, "loss": 0.5996, "step": 1426 }, { "epoch": 0.54, "learning_rate": 1.8310808774099002e-05, "loss": 0.5369, "step": 1427 }, { "epoch": 0.54, "learning_rate": 1.8307146379729545e-05, "loss": 0.623, "step": 1428 }, { "epoch": 0.54, "learning_rate": 1.830348038646884e-05, "loss": 0.578, "step": 1429 }, { "epoch": 0.54, "learning_rate": 1.8299810795905088e-05, "loss": 0.6253, "step": 1430 }, { "epoch": 0.54, "learning_rate": 1.8296137609628064e-05, "loss": 0.6416, "step": 1431 }, { "epoch": 0.54, "learning_rate": 1.8292460829229096e-05, "loss": 0.5874, "step": 1432 }, { "epoch": 0.54, "learning_rate": 1.828878045630107e-05, "loss": 0.57, "step": 1433 }, { "epoch": 0.54, "learning_rate": 1.8285096492438424e-05, "loss": 0.5238, "step": 1434 }, { "epoch": 0.54, "learning_rate": 1.828140893923716e-05, "loss": 0.5933, "step": 1435 }, { "epoch": 0.54, "learning_rate": 1.8277717798294827e-05, "loss": 0.5962, "step": 1436 }, { "epoch": 0.54, "learning_rate": 1.8274023071210535e-05, "loss": 0.6007, "step": 1437 }, { "epoch": 0.54, "learning_rate": 1.8270324759584946e-05, "loss": 0.5553, "step": 1438 }, { "epoch": 0.54, "learning_rate": 1.8266622865020267e-05, "loss": 0.5675, "step": 1439 }, { "epoch": 0.54, "learning_rate": 1.826291738912027e-05, "loss": 0.6038, "step": 1440 }, { "epoch": 0.54, "learning_rate": 1.825920833349027e-05, "loss": 0.637, "step": 1441 }, { "epoch": 0.54, "learning_rate": 1.8255495699737128e-05, "loss": 0.5922, "step": 1442 }, { "epoch": 0.54, "learning_rate": 1.8251779489469277e-05, "loss": 0.6027, "step": 1443 }, { "epoch": 0.54, "learning_rate": 1.824805970429667e-05, "loss": 0.574, "step": 1444 }, { "epoch": 0.54, "learning_rate": 1.8244336345830836e-05, "loss": 0.6811, "step": 1445 }, { "epoch": 0.55, "learning_rate": 1.8240609415684828e-05, "loss": 0.5833, "step": 1446 }, { "epoch": 0.55, "learning_rate": 1.8236878915473265e-05, "loss": 0.6071, "step": 1447 }, { "epoch": 0.55, "learning_rate": 1.8233144846812306e-05, "loss": 0.5561, "step": 1448 }, { "epoch": 0.55, "learning_rate": 1.822940721131965e-05, "loss": 0.5778, "step": 1449 }, { "epoch": 0.55, "learning_rate": 1.8225666010614547e-05, "loss": 0.6018, "step": 1450 }, { "epoch": 0.55, "learning_rate": 1.8221921246317798e-05, "loss": 0.5662, "step": 1451 }, { "epoch": 0.55, "learning_rate": 1.8218172920051732e-05, "loss": 0.5504, "step": 1452 }, { "epoch": 0.55, "learning_rate": 1.8214421033440238e-05, "loss": 0.553, "step": 1453 }, { "epoch": 0.55, "learning_rate": 1.8210665588108737e-05, "loss": 0.6243, "step": 1454 }, { "epoch": 0.55, "learning_rate": 1.820690658568419e-05, "loss": 0.6503, "step": 1455 }, { "epoch": 0.55, "learning_rate": 1.820314402779511e-05, "loss": 0.5855, "step": 1456 }, { "epoch": 0.55, "learning_rate": 1.819937791607154e-05, "loss": 0.637, "step": 1457 }, { "epoch": 0.55, "learning_rate": 1.8195608252145067e-05, "loss": 0.593, "step": 1458 }, { "epoch": 0.55, "learning_rate": 1.819183503764882e-05, "loss": 0.6311, "step": 1459 }, { "epoch": 0.55, "learning_rate": 1.818805827421745e-05, "loss": 0.5945, "step": 1460 }, { "epoch": 0.55, "learning_rate": 1.8184277963487176e-05, "loss": 0.6141, "step": 1461 }, { "epoch": 0.55, "learning_rate": 1.8180494107095725e-05, "loss": 0.5561, "step": 1462 }, { "epoch": 0.55, "learning_rate": 1.8176706706682368e-05, "loss": 0.5812, "step": 1463 }, { "epoch": 0.55, "learning_rate": 1.8172915763887923e-05, "loss": 0.7004, "step": 1464 }, { "epoch": 0.55, "learning_rate": 1.816912128035473e-05, "loss": 0.591, "step": 1465 }, { "epoch": 0.55, "learning_rate": 1.816532325772666e-05, "loss": 0.6045, "step": 1466 }, { "epoch": 0.55, "learning_rate": 1.8161521697649135e-05, "loss": 0.6005, "step": 1467 }, { "epoch": 0.55, "learning_rate": 1.8157716601769093e-05, "loss": 0.6148, "step": 1468 }, { "epoch": 0.55, "learning_rate": 1.815390797173501e-05, "loss": 0.6196, "step": 1469 }, { "epoch": 0.55, "learning_rate": 1.8150095809196894e-05, "loss": 0.6198, "step": 1470 }, { "epoch": 0.55, "learning_rate": 1.814628011580628e-05, "loss": 0.6105, "step": 1471 }, { "epoch": 0.56, "learning_rate": 1.8142460893216235e-05, "loss": 0.5707, "step": 1472 }, { "epoch": 0.56, "learning_rate": 1.8138638143081354e-05, "loss": 0.6356, "step": 1473 }, { "epoch": 0.56, "learning_rate": 1.8134811867057763e-05, "loss": 0.594, "step": 1474 }, { "epoch": 0.56, "learning_rate": 1.813098206680311e-05, "loss": 0.5865, "step": 1475 }, { "epoch": 0.56, "learning_rate": 1.8127148743976576e-05, "loss": 0.6198, "step": 1476 }, { "epoch": 0.56, "learning_rate": 1.812331190023886e-05, "loss": 0.5781, "step": 1477 }, { "epoch": 0.56, "learning_rate": 1.8119471537252198e-05, "loss": 0.5802, "step": 1478 }, { "epoch": 0.56, "learning_rate": 1.811562765668034e-05, "loss": 0.5427, "step": 1479 }, { "epoch": 0.56, "learning_rate": 1.811178026018857e-05, "loss": 0.6351, "step": 1480 }, { "epoch": 0.56, "learning_rate": 1.810792934944368e-05, "loss": 0.6286, "step": 1481 }, { "epoch": 0.56, "learning_rate": 1.8104074926113994e-05, "loss": 0.5805, "step": 1482 }, { "epoch": 0.56, "learning_rate": 1.8100216991869368e-05, "loss": 0.5682, "step": 1483 }, { "epoch": 0.56, "learning_rate": 1.809635554838116e-05, "loss": 0.6595, "step": 1484 }, { "epoch": 0.56, "learning_rate": 1.809249059732226e-05, "loss": 0.6013, "step": 1485 }, { "epoch": 0.56, "learning_rate": 1.8088622140367073e-05, "loss": 0.6178, "step": 1486 }, { "epoch": 0.56, "learning_rate": 1.808475017919152e-05, "loss": 0.5931, "step": 1487 }, { "epoch": 0.56, "learning_rate": 1.8080874715473054e-05, "loss": 0.6106, "step": 1488 }, { "epoch": 0.56, "learning_rate": 1.8076995750890628e-05, "loss": 0.6705, "step": 1489 }, { "epoch": 0.56, "learning_rate": 1.807311328712472e-05, "loss": 0.5624, "step": 1490 }, { "epoch": 0.56, "learning_rate": 1.806922732585733e-05, "loss": 0.6063, "step": 1491 }, { "epoch": 0.56, "learning_rate": 1.8065337868771956e-05, "loss": 0.5555, "step": 1492 }, { "epoch": 0.56, "learning_rate": 1.806144491755363e-05, "loss": 0.6341, "step": 1493 }, { "epoch": 0.56, "learning_rate": 1.8057548473888885e-05, "loss": 0.5479, "step": 1494 }, { "epoch": 0.56, "learning_rate": 1.805364853946577e-05, "loss": 0.5834, "step": 1495 }, { "epoch": 0.56, "learning_rate": 1.8049745115973848e-05, "loss": 0.6099, "step": 1496 }, { "epoch": 0.56, "learning_rate": 1.8045838205104193e-05, "loss": 0.6069, "step": 1497 }, { "epoch": 0.56, "learning_rate": 1.8041927808549392e-05, "loss": 0.5973, "step": 1498 }, { "epoch": 0.57, "learning_rate": 1.803801392800354e-05, "loss": 0.5795, "step": 1499 }, { "epoch": 0.57, "learning_rate": 1.8034096565162232e-05, "loss": 0.569, "step": 1500 }, { "epoch": 0.57, "learning_rate": 1.803017572172259e-05, "loss": 0.6173, "step": 1501 }, { "epoch": 0.57, "learning_rate": 1.802625139938323e-05, "loss": 0.5773, "step": 1502 }, { "epoch": 0.57, "learning_rate": 1.8022323599844283e-05, "loss": 0.5696, "step": 1503 }, { "epoch": 0.57, "learning_rate": 1.8018392324807376e-05, "loss": 0.5876, "step": 1504 }, { "epoch": 0.57, "learning_rate": 1.801445757597566e-05, "loss": 0.5757, "step": 1505 }, { "epoch": 0.57, "learning_rate": 1.801051935505377e-05, "loss": 0.6194, "step": 1506 }, { "epoch": 0.57, "learning_rate": 1.800657766374786e-05, "loss": 0.605, "step": 1507 }, { "epoch": 0.57, "learning_rate": 1.800263250376558e-05, "loss": 0.5768, "step": 1508 }, { "epoch": 0.57, "learning_rate": 1.7998683876816087e-05, "loss": 0.6016, "step": 1509 }, { "epoch": 0.57, "learning_rate": 1.7994731784610035e-05, "loss": 0.6055, "step": 1510 }, { "epoch": 0.57, "learning_rate": 1.7990776228859586e-05, "loss": 0.5221, "step": 1511 }, { "epoch": 0.57, "learning_rate": 1.7986817211278398e-05, "loss": 0.6073, "step": 1512 }, { "epoch": 0.57, "learning_rate": 1.798285473358163e-05, "loss": 0.6537, "step": 1513 }, { "epoch": 0.57, "learning_rate": 1.7978888797485937e-05, "loss": 0.5401, "step": 1514 }, { "epoch": 0.57, "learning_rate": 1.7974919404709474e-05, "loss": 0.5919, "step": 1515 }, { "epoch": 0.57, "learning_rate": 1.79709465569719e-05, "loss": 0.6125, "step": 1516 }, { "epoch": 0.57, "learning_rate": 1.796697025599436e-05, "loss": 0.5785, "step": 1517 }, { "epoch": 0.57, "learning_rate": 1.7962990503499504e-05, "loss": 0.6195, "step": 1518 }, { "epoch": 0.57, "learning_rate": 1.7959007301211473e-05, "loss": 0.6291, "step": 1519 }, { "epoch": 0.57, "learning_rate": 1.79550206508559e-05, "loss": 0.6572, "step": 1520 }, { "epoch": 0.57, "learning_rate": 1.7951030554159922e-05, "loss": 0.5796, "step": 1521 }, { "epoch": 0.57, "learning_rate": 1.794703701285215e-05, "loss": 0.6062, "step": 1522 }, { "epoch": 0.57, "learning_rate": 1.794304002866271e-05, "loss": 0.6254, "step": 1523 }, { "epoch": 0.57, "learning_rate": 1.7939039603323204e-05, "loss": 0.6067, "step": 1524 }, { "epoch": 0.58, "learning_rate": 1.7935035738566732e-05, "loss": 0.6138, "step": 1525 }, { "epoch": 0.58, "learning_rate": 1.793102843612788e-05, "loss": 0.6111, "step": 1526 }, { "epoch": 0.58, "learning_rate": 1.7927017697742726e-05, "loss": 0.6225, "step": 1527 }, { "epoch": 0.58, "learning_rate": 1.7923003525148836e-05, "loss": 0.642, "step": 1528 }, { "epoch": 0.58, "learning_rate": 1.791898592008526e-05, "loss": 0.6406, "step": 1529 }, { "epoch": 0.58, "learning_rate": 1.7914964884292543e-05, "loss": 0.5341, "step": 1530 }, { "epoch": 0.58, "learning_rate": 1.791094041951271e-05, "loss": 0.6431, "step": 1531 }, { "epoch": 0.58, "learning_rate": 1.790691252748927e-05, "loss": 0.6345, "step": 1532 }, { "epoch": 0.58, "learning_rate": 1.7902881209967222e-05, "loss": 0.6093, "step": 1533 }, { "epoch": 0.58, "learning_rate": 1.7898846468693052e-05, "loss": 0.5719, "step": 1534 }, { "epoch": 0.58, "learning_rate": 1.7894808305414716e-05, "loss": 0.5683, "step": 1535 }, { "epoch": 0.58, "learning_rate": 1.7890766721881666e-05, "loss": 0.5835, "step": 1536 }, { "epoch": 0.58, "learning_rate": 1.7886721719844828e-05, "loss": 0.6052, "step": 1537 }, { "epoch": 0.58, "learning_rate": 1.7882673301056614e-05, "loss": 0.585, "step": 1538 }, { "epoch": 0.58, "learning_rate": 1.7878621467270912e-05, "loss": 0.6085, "step": 1539 }, { "epoch": 0.58, "learning_rate": 1.787456622024309e-05, "loss": 0.5513, "step": 1540 }, { "epoch": 0.58, "learning_rate": 1.7870507561730004e-05, "loss": 0.5734, "step": 1541 }, { "epoch": 0.58, "learning_rate": 1.786644549348997e-05, "loss": 0.5409, "step": 1542 }, { "epoch": 0.58, "learning_rate": 1.7862380017282795e-05, "loss": 0.5847, "step": 1543 }, { "epoch": 0.58, "learning_rate": 1.7858311134869758e-05, "loss": 0.6344, "step": 1544 }, { "epoch": 0.58, "learning_rate": 1.785423884801361e-05, "loss": 0.58, "step": 1545 }, { "epoch": 0.58, "learning_rate": 1.7850163158478593e-05, "loss": 0.6082, "step": 1546 }, { "epoch": 0.58, "learning_rate": 1.7846084068030398e-05, "loss": 0.6238, "step": 1547 }, { "epoch": 0.58, "learning_rate": 1.7842001578436214e-05, "loss": 0.5736, "step": 1548 }, { "epoch": 0.58, "learning_rate": 1.783791569146468e-05, "loss": 0.5852, "step": 1549 }, { "epoch": 0.58, "learning_rate": 1.7833826408885927e-05, "loss": 0.6385, "step": 1550 }, { "epoch": 0.58, "learning_rate": 1.782973373247154e-05, "loss": 0.56, "step": 1551 }, { "epoch": 0.59, "learning_rate": 1.782563766399459e-05, "loss": 0.5673, "step": 1552 }, { "epoch": 0.59, "learning_rate": 1.7821538205229608e-05, "loss": 0.6064, "step": 1553 }, { "epoch": 0.59, "learning_rate": 1.7817435357952595e-05, "loss": 0.64, "step": 1554 }, { "epoch": 0.59, "learning_rate": 1.7813329123941022e-05, "loss": 0.6085, "step": 1555 }, { "epoch": 0.59, "learning_rate": 1.780921950497382e-05, "loss": 0.5545, "step": 1556 }, { "epoch": 0.59, "learning_rate": 1.78051065028314e-05, "loss": 0.625, "step": 1557 }, { "epoch": 0.59, "learning_rate": 1.7800990119295626e-05, "loss": 0.5906, "step": 1558 }, { "epoch": 0.59, "learning_rate": 1.7796870356149833e-05, "loss": 0.6196, "step": 1559 }, { "epoch": 0.59, "learning_rate": 1.7792747215178822e-05, "loss": 0.5973, "step": 1560 }, { "epoch": 0.59, "learning_rate": 1.7788620698168854e-05, "loss": 0.6138, "step": 1561 }, { "epoch": 0.59, "learning_rate": 1.778449080690765e-05, "loss": 0.5729, "step": 1562 }, { "epoch": 0.59, "learning_rate": 1.7780357543184396e-05, "loss": 0.6436, "step": 1563 }, { "epoch": 0.59, "learning_rate": 1.7776220908789742e-05, "loss": 0.5796, "step": 1564 }, { "epoch": 0.59, "learning_rate": 1.7772080905515796e-05, "loss": 0.5976, "step": 1565 }, { "epoch": 0.59, "learning_rate": 1.7767937535156122e-05, "loss": 0.6299, "step": 1566 }, { "epoch": 0.59, "learning_rate": 1.7763790799505746e-05, "loss": 0.584, "step": 1567 }, { "epoch": 0.59, "learning_rate": 1.775964070036115e-05, "loss": 0.6026, "step": 1568 }, { "epoch": 0.59, "learning_rate": 1.7755487239520278e-05, "loss": 0.6463, "step": 1569 }, { "epoch": 0.59, "learning_rate": 1.7751330418782524e-05, "loss": 0.6074, "step": 1570 }, { "epoch": 0.59, "learning_rate": 1.774717023994874e-05, "loss": 0.526, "step": 1571 }, { "epoch": 0.59, "learning_rate": 1.774300670482124e-05, "loss": 0.5668, "step": 1572 }, { "epoch": 0.59, "learning_rate": 1.7738839815203777e-05, "loss": 0.5748, "step": 1573 }, { "epoch": 0.59, "learning_rate": 1.773466957290157e-05, "loss": 0.5927, "step": 1574 }, { "epoch": 0.59, "learning_rate": 1.7730495979721284e-05, "loss": 0.6001, "step": 1575 }, { "epoch": 0.59, "learning_rate": 1.772631903747104e-05, "loss": 0.6472, "step": 1576 }, { "epoch": 0.59, "learning_rate": 1.7722138747960408e-05, "loss": 0.5679, "step": 1577 }, { "epoch": 0.6, "learning_rate": 1.7717955113000408e-05, "loss": 0.597, "step": 1578 }, { "epoch": 0.6, "learning_rate": 1.7713768134403504e-05, "loss": 0.5897, "step": 1579 }, { "epoch": 0.6, "learning_rate": 1.7709577813983623e-05, "loss": 0.6023, "step": 1580 }, { "epoch": 0.6, "learning_rate": 1.770538415355612e-05, "loss": 0.628, "step": 1581 }, { "epoch": 0.6, "learning_rate": 1.7701187154937817e-05, "loss": 0.5544, "step": 1582 }, { "epoch": 0.6, "learning_rate": 1.769698681994697e-05, "loss": 0.6226, "step": 1583 }, { "epoch": 0.6, "learning_rate": 1.7692783150403278e-05, "loss": 0.6079, "step": 1584 }, { "epoch": 0.6, "learning_rate": 1.7688576148127895e-05, "loss": 0.5989, "step": 1585 }, { "epoch": 0.6, "learning_rate": 1.7684365814943413e-05, "loss": 0.6044, "step": 1586 }, { "epoch": 0.6, "learning_rate": 1.7680152152673863e-05, "loss": 0.5927, "step": 1587 }, { "epoch": 0.6, "learning_rate": 1.7675935163144733e-05, "loss": 0.5578, "step": 1588 }, { "epoch": 0.6, "learning_rate": 1.7671714848182934e-05, "loss": 0.5759, "step": 1589 }, { "epoch": 0.6, "learning_rate": 1.7667491209616828e-05, "loss": 0.6471, "step": 1590 }, { "epoch": 0.6, "learning_rate": 1.7663264249276215e-05, "loss": 0.596, "step": 1591 }, { "epoch": 0.6, "learning_rate": 1.7659033968992334e-05, "loss": 0.539, "step": 1592 }, { "epoch": 0.6, "learning_rate": 1.7654800370597866e-05, "loss": 0.6375, "step": 1593 }, { "epoch": 0.6, "learning_rate": 1.7650563455926918e-05, "loss": 0.6145, "step": 1594 }, { "epoch": 0.6, "learning_rate": 1.764632322681505e-05, "loss": 0.6131, "step": 1595 }, { "epoch": 0.6, "learning_rate": 1.7642079685099243e-05, "loss": 0.5789, "step": 1596 }, { "epoch": 0.6, "learning_rate": 1.763783283261792e-05, "loss": 0.6023, "step": 1597 }, { "epoch": 0.6, "learning_rate": 1.763358267121094e-05, "loss": 0.6426, "step": 1598 }, { "epoch": 0.6, "learning_rate": 1.76293292027196e-05, "loss": 0.5607, "step": 1599 }, { "epoch": 0.6, "learning_rate": 1.762507242898661e-05, "loss": 0.6161, "step": 1600 }, { "epoch": 0.6, "learning_rate": 1.7620812351856133e-05, "loss": 0.5777, "step": 1601 }, { "epoch": 0.6, "learning_rate": 1.7616548973173752e-05, "loss": 0.5403, "step": 1602 }, { "epoch": 0.6, "learning_rate": 1.7612282294786488e-05, "loss": 0.6411, "step": 1603 }, { "epoch": 0.6, "learning_rate": 1.760801231854278e-05, "loss": 0.6117, "step": 1604 }, { "epoch": 0.61, "learning_rate": 1.760373904629251e-05, "loss": 0.5622, "step": 1605 }, { "epoch": 0.61, "learning_rate": 1.7599462479886976e-05, "loss": 0.5963, "step": 1606 }, { "epoch": 0.61, "learning_rate": 1.759518262117891e-05, "loss": 0.6368, "step": 1607 }, { "epoch": 0.61, "learning_rate": 1.7590899472022468e-05, "loss": 0.6272, "step": 1608 }, { "epoch": 0.61, "learning_rate": 1.7586613034273227e-05, "loss": 0.6048, "step": 1609 }, { "epoch": 0.61, "learning_rate": 1.75823233097882e-05, "loss": 0.6086, "step": 1610 }, { "epoch": 0.61, "learning_rate": 1.7578030300425814e-05, "loss": 0.5624, "step": 1611 }, { "epoch": 0.61, "learning_rate": 1.7573734008045922e-05, "loss": 0.5891, "step": 1612 }, { "epoch": 0.61, "learning_rate": 1.7569434434509804e-05, "loss": 0.6, "step": 1613 }, { "epoch": 0.61, "learning_rate": 1.7565131581680148e-05, "loss": 0.6083, "step": 1614 }, { "epoch": 0.61, "learning_rate": 1.756082545142108e-05, "loss": 0.6147, "step": 1615 }, { "epoch": 0.61, "learning_rate": 1.7556516045598136e-05, "loss": 0.5511, "step": 1616 }, { "epoch": 0.61, "learning_rate": 1.7552203366078268e-05, "loss": 0.6194, "step": 1617 }, { "epoch": 0.61, "learning_rate": 1.7547887414729858e-05, "loss": 0.5969, "step": 1618 }, { "epoch": 0.61, "learning_rate": 1.7543568193422692e-05, "loss": 0.5841, "step": 1619 }, { "epoch": 0.61, "learning_rate": 1.7539245704027985e-05, "loss": 0.6003, "step": 1620 }, { "epoch": 0.61, "learning_rate": 1.7534919948418362e-05, "loss": 0.5639, "step": 1621 }, { "epoch": 0.61, "learning_rate": 1.7530590928467854e-05, "loss": 0.6217, "step": 1622 }, { "epoch": 0.61, "learning_rate": 1.7526258646051924e-05, "loss": 0.6145, "step": 1623 }, { "epoch": 0.61, "learning_rate": 1.7521923103047435e-05, "loss": 0.5825, "step": 1624 }, { "epoch": 0.61, "learning_rate": 1.751758430133267e-05, "loss": 0.5097, "step": 1625 }, { "epoch": 0.61, "learning_rate": 1.7513242242787322e-05, "loss": 0.6344, "step": 1626 }, { "epoch": 0.61, "learning_rate": 1.7508896929292488e-05, "loss": 0.5436, "step": 1627 }, { "epoch": 0.61, "learning_rate": 1.7504548362730685e-05, "loss": 0.6473, "step": 1628 }, { "epoch": 0.61, "learning_rate": 1.7500196544985834e-05, "loss": 0.5572, "step": 1629 }, { "epoch": 0.61, "learning_rate": 1.749584147794327e-05, "loss": 0.5939, "step": 1630 }, { "epoch": 0.62, "learning_rate": 1.7491483163489726e-05, "loss": 0.6239, "step": 1631 }, { "epoch": 0.62, "learning_rate": 1.7487121603513348e-05, "loss": 0.5865, "step": 1632 }, { "epoch": 0.62, "learning_rate": 1.748275679990369e-05, "loss": 0.5732, "step": 1633 }, { "epoch": 0.62, "learning_rate": 1.747838875455171e-05, "loss": 0.5508, "step": 1634 }, { "epoch": 0.62, "learning_rate": 1.747401746934976e-05, "loss": 0.6448, "step": 1635 }, { "epoch": 0.62, "learning_rate": 1.746964294619162e-05, "loss": 0.6586, "step": 1636 }, { "epoch": 0.62, "learning_rate": 1.746526518697244e-05, "loss": 0.6109, "step": 1637 }, { "epoch": 0.62, "learning_rate": 1.7460884193588804e-05, "loss": 0.6196, "step": 1638 }, { "epoch": 0.62, "learning_rate": 1.7456499967938673e-05, "loss": 0.6462, "step": 1639 }, { "epoch": 0.62, "learning_rate": 1.7452112511921422e-05, "loss": 0.6296, "step": 1640 }, { "epoch": 0.62, "learning_rate": 1.744772182743782e-05, "loss": 0.5905, "step": 1641 }, { "epoch": 0.62, "learning_rate": 1.7443327916390038e-05, "loss": 0.5934, "step": 1642 }, { "epoch": 0.62, "learning_rate": 1.743893078068164e-05, "loss": 0.6263, "step": 1643 }, { "epoch": 0.62, "learning_rate": 1.7434530422217592e-05, "loss": 0.6253, "step": 1644 }, { "epoch": 0.62, "learning_rate": 1.743012684290425e-05, "loss": 0.588, "step": 1645 }, { "epoch": 0.62, "learning_rate": 1.7425720044649376e-05, "loss": 0.5777, "step": 1646 }, { "epoch": 0.62, "learning_rate": 1.7421310029362115e-05, "loss": 0.5665, "step": 1647 }, { "epoch": 0.62, "learning_rate": 1.741689679895301e-05, "loss": 0.5839, "step": 1648 }, { "epoch": 0.62, "learning_rate": 1.7412480355334006e-05, "loss": 0.6166, "step": 1649 }, { "epoch": 0.62, "learning_rate": 1.740806070041842e-05, "loss": 0.5851, "step": 1650 }, { "epoch": 0.62, "learning_rate": 1.7403637836120976e-05, "loss": 0.6449, "step": 1651 }, { "epoch": 0.62, "learning_rate": 1.7399211764357783e-05, "loss": 0.6349, "step": 1652 }, { "epoch": 0.62, "learning_rate": 1.739478248704635e-05, "loss": 0.5166, "step": 1653 }, { "epoch": 0.62, "learning_rate": 1.739035000610555e-05, "loss": 0.5653, "step": 1654 }, { "epoch": 0.62, "learning_rate": 1.7385914323455676e-05, "loss": 0.5678, "step": 1655 }, { "epoch": 0.62, "learning_rate": 1.7381475441018378e-05, "loss": 0.5564, "step": 1656 }, { "epoch": 0.62, "learning_rate": 1.737703336071671e-05, "loss": 0.6586, "step": 1657 }, { "epoch": 0.63, "learning_rate": 1.7372588084475105e-05, "loss": 0.6273, "step": 1658 }, { "epoch": 0.63, "learning_rate": 1.7368139614219393e-05, "loss": 0.5915, "step": 1659 }, { "epoch": 0.63, "learning_rate": 1.7363687951876763e-05, "loss": 0.5516, "step": 1660 }, { "epoch": 0.63, "learning_rate": 1.735923309937581e-05, "loss": 0.5831, "step": 1661 }, { "epoch": 0.63, "learning_rate": 1.73547750586465e-05, "loss": 0.579, "step": 1662 }, { "epoch": 0.63, "learning_rate": 1.7350313831620188e-05, "loss": 0.6048, "step": 1663 }, { "epoch": 0.63, "learning_rate": 1.7345849420229596e-05, "loss": 0.5969, "step": 1664 }, { "epoch": 0.63, "learning_rate": 1.7341381826408838e-05, "loss": 0.5499, "step": 1665 }, { "epoch": 0.63, "learning_rate": 1.7336911052093397e-05, "loss": 0.6328, "step": 1666 }, { "epoch": 0.63, "learning_rate": 1.7332437099220143e-05, "loss": 0.6103, "step": 1667 }, { "epoch": 0.63, "learning_rate": 1.7327959969727325e-05, "loss": 0.6402, "step": 1668 }, { "epoch": 0.63, "learning_rate": 1.7323479665554546e-05, "loss": 0.5694, "step": 1669 }, { "epoch": 0.63, "learning_rate": 1.731899618864282e-05, "loss": 0.5324, "step": 1670 }, { "epoch": 0.63, "learning_rate": 1.7314509540934505e-05, "loss": 0.6088, "step": 1671 }, { "epoch": 0.63, "learning_rate": 1.7310019724373348e-05, "loss": 0.5459, "step": 1672 }, { "epoch": 0.63, "learning_rate": 1.7305526740904462e-05, "loss": 0.6537, "step": 1673 }, { "epoch": 0.63, "learning_rate": 1.7301030592474333e-05, "loss": 0.5757, "step": 1674 }, { "epoch": 0.63, "learning_rate": 1.729653128103083e-05, "loss": 0.5364, "step": 1675 }, { "epoch": 0.63, "learning_rate": 1.729202880852317e-05, "loss": 0.5814, "step": 1676 }, { "epoch": 0.63, "learning_rate": 1.728752317690196e-05, "loss": 0.5912, "step": 1677 }, { "epoch": 0.63, "learning_rate": 1.728301438811916e-05, "loss": 0.5828, "step": 1678 }, { "epoch": 0.63, "learning_rate": 1.7278502444128112e-05, "loss": 0.6261, "step": 1679 }, { "epoch": 0.63, "learning_rate": 1.7273987346883518e-05, "loss": 0.6343, "step": 1680 }, { "epoch": 0.63, "learning_rate": 1.726946909834144e-05, "loss": 0.6412, "step": 1681 }, { "epoch": 0.63, "learning_rate": 1.7264947700459313e-05, "loss": 0.5953, "step": 1682 }, { "epoch": 0.63, "learning_rate": 1.726042315519594e-05, "loss": 0.6096, "step": 1683 }, { "epoch": 0.63, "learning_rate": 1.7255895464511477e-05, "loss": 0.6607, "step": 1684 }, { "epoch": 0.64, "learning_rate": 1.7251364630367453e-05, "loss": 0.6218, "step": 1685 }, { "epoch": 0.64, "learning_rate": 1.724683065472675e-05, "loss": 0.59, "step": 1686 }, { "epoch": 0.64, "learning_rate": 1.724229353955361e-05, "loss": 0.6015, "step": 1687 }, { "epoch": 0.64, "learning_rate": 1.7237753286813648e-05, "loss": 0.5697, "step": 1688 }, { "epoch": 0.64, "learning_rate": 1.7233209898473825e-05, "loss": 0.6198, "step": 1689 }, { "epoch": 0.64, "learning_rate": 1.722866337650247e-05, "loss": 0.5749, "step": 1690 }, { "epoch": 0.64, "learning_rate": 1.7224113722869266e-05, "loss": 0.5491, "step": 1691 }, { "epoch": 0.64, "learning_rate": 1.7219560939545246e-05, "loss": 0.56, "step": 1692 }, { "epoch": 0.64, "learning_rate": 1.721500502850281e-05, "loss": 0.5795, "step": 1693 }, { "epoch": 0.64, "learning_rate": 1.7210445991715702e-05, "loss": 0.5868, "step": 1694 }, { "epoch": 0.64, "learning_rate": 1.720588383115903e-05, "loss": 0.5305, "step": 1695 }, { "epoch": 0.64, "learning_rate": 1.7201318548809252e-05, "loss": 0.5905, "step": 1696 }, { "epoch": 0.64, "learning_rate": 1.7196750146644176e-05, "loss": 0.5723, "step": 1697 }, { "epoch": 0.64, "learning_rate": 1.7192178626642965e-05, "loss": 0.5868, "step": 1698 }, { "epoch": 0.64, "learning_rate": 1.7187603990786127e-05, "loss": 0.575, "step": 1699 }, { "epoch": 0.64, "learning_rate": 1.7183026241055527e-05, "loss": 0.579, "step": 1700 }, { "epoch": 0.64, "learning_rate": 1.717844537943438e-05, "loss": 0.5613, "step": 1701 }, { "epoch": 0.64, "learning_rate": 1.7173861407907234e-05, "loss": 0.5244, "step": 1702 }, { "epoch": 0.64, "learning_rate": 1.7169274328460008e-05, "loss": 0.6177, "step": 1703 }, { "epoch": 0.64, "learning_rate": 1.7164684143079943e-05, "loss": 0.57, "step": 1704 }, { "epoch": 0.64, "learning_rate": 1.7160090853755646e-05, "loss": 0.5975, "step": 1705 }, { "epoch": 0.64, "learning_rate": 1.7155494462477057e-05, "loss": 0.6871, "step": 1706 }, { "epoch": 0.64, "learning_rate": 1.7150894971235464e-05, "loss": 0.5873, "step": 1707 }, { "epoch": 0.64, "learning_rate": 1.71462923820235e-05, "loss": 0.5648, "step": 1708 }, { "epoch": 0.64, "learning_rate": 1.7141686696835128e-05, "loss": 0.5531, "step": 1709 }, { "epoch": 0.64, "learning_rate": 1.713707791766567e-05, "loss": 0.5797, "step": 1710 }, { "epoch": 0.65, "learning_rate": 1.7132466046511774e-05, "loss": 0.6009, "step": 1711 }, { "epoch": 0.65, "learning_rate": 1.7127851085371436e-05, "loss": 0.5862, "step": 1712 }, { "epoch": 0.65, "learning_rate": 1.712323303624399e-05, "loss": 0.5344, "step": 1713 }, { "epoch": 0.65, "learning_rate": 1.71186119011301e-05, "loss": 0.6017, "step": 1714 }, { "epoch": 0.65, "learning_rate": 1.711398768203178e-05, "loss": 0.621, "step": 1715 }, { "epoch": 0.65, "learning_rate": 1.7109360380952366e-05, "loss": 0.6089, "step": 1716 }, { "epoch": 0.65, "learning_rate": 1.710472999989654e-05, "loss": 0.6103, "step": 1717 }, { "epoch": 0.65, "learning_rate": 1.710009654087032e-05, "loss": 0.5346, "step": 1718 }, { "epoch": 0.65, "learning_rate": 1.7095460005881034e-05, "loss": 0.6003, "step": 1719 }, { "epoch": 0.65, "learning_rate": 1.7090820396937376e-05, "loss": 0.584, "step": 1720 }, { "epoch": 0.65, "learning_rate": 1.7086177716049354e-05, "loss": 0.5797, "step": 1721 }, { "epoch": 0.65, "learning_rate": 1.7081531965228305e-05, "loss": 0.5119, "step": 1722 }, { "epoch": 0.65, "learning_rate": 1.70768831464869e-05, "loss": 0.6123, "step": 1723 }, { "epoch": 0.65, "learning_rate": 1.707223126183914e-05, "loss": 0.6171, "step": 1724 }, { "epoch": 0.65, "learning_rate": 1.7067576313300357e-05, "loss": 0.5182, "step": 1725 }, { "epoch": 0.65, "learning_rate": 1.7062918302887203e-05, "loss": 0.5942, "step": 1726 }, { "epoch": 0.65, "learning_rate": 1.7058257232617658e-05, "loss": 0.587, "step": 1727 }, { "epoch": 0.65, "learning_rate": 1.705359310451103e-05, "loss": 0.5629, "step": 1728 }, { "epoch": 0.65, "learning_rate": 1.704892592058796e-05, "loss": 0.5967, "step": 1729 }, { "epoch": 0.65, "learning_rate": 1.7044255682870395e-05, "loss": 0.5775, "step": 1730 }, { "epoch": 0.65, "learning_rate": 1.703958239338162e-05, "loss": 0.624, "step": 1731 }, { "epoch": 0.65, "learning_rate": 1.703490605414623e-05, "loss": 0.5806, "step": 1732 }, { "epoch": 0.65, "learning_rate": 1.7030226667190156e-05, "loss": 0.5944, "step": 1733 }, { "epoch": 0.65, "learning_rate": 1.7025544234540633e-05, "loss": 0.615, "step": 1734 }, { "epoch": 0.65, "learning_rate": 1.702085875822623e-05, "loss": 0.5467, "step": 1735 }, { "epoch": 0.65, "learning_rate": 1.701617024027682e-05, "loss": 0.602, "step": 1736 }, { "epoch": 0.65, "learning_rate": 1.701147868272361e-05, "loss": 0.621, "step": 1737 }, { "epoch": 0.66, "learning_rate": 1.700678408759911e-05, "loss": 0.6049, "step": 1738 }, { "epoch": 0.66, "learning_rate": 1.7002086456937164e-05, "loss": 0.5427, "step": 1739 }, { "epoch": 0.66, "learning_rate": 1.69973857927729e-05, "loss": 0.6044, "step": 1740 }, { "epoch": 0.66, "learning_rate": 1.6992682097142795e-05, "loss": 0.5683, "step": 1741 }, { "epoch": 0.66, "learning_rate": 1.698797537208461e-05, "loss": 0.632, "step": 1742 }, { "epoch": 0.66, "learning_rate": 1.6983265619637444e-05, "loss": 0.5675, "step": 1743 }, { "epoch": 0.66, "learning_rate": 1.6978552841841688e-05, "loss": 0.5841, "step": 1744 }, { "epoch": 0.66, "learning_rate": 1.697383704073905e-05, "loss": 0.5565, "step": 1745 }, { "epoch": 0.66, "learning_rate": 1.696911821837255e-05, "loss": 0.59, "step": 1746 }, { "epoch": 0.66, "learning_rate": 1.6964396376786522e-05, "loss": 0.5724, "step": 1747 }, { "epoch": 0.66, "learning_rate": 1.695967151802659e-05, "loss": 0.5332, "step": 1748 }, { "epoch": 0.66, "learning_rate": 1.6954943644139707e-05, "loss": 0.5434, "step": 1749 }, { "epoch": 0.66, "learning_rate": 1.6950212757174117e-05, "loss": 0.6134, "step": 1750 }, { "epoch": 0.66, "learning_rate": 1.6945478859179375e-05, "loss": 0.5532, "step": 1751 }, { "epoch": 0.66, "learning_rate": 1.6940741952206342e-05, "loss": 0.6213, "step": 1752 }, { "epoch": 0.66, "learning_rate": 1.6936002038307174e-05, "loss": 0.5711, "step": 1753 }, { "epoch": 0.66, "learning_rate": 1.6931259119535342e-05, "loss": 0.6139, "step": 1754 }, { "epoch": 0.66, "learning_rate": 1.6926513197945613e-05, "loss": 0.6019, "step": 1755 }, { "epoch": 0.66, "learning_rate": 1.6921764275594054e-05, "loss": 0.5497, "step": 1756 }, { "epoch": 0.66, "learning_rate": 1.691701235453803e-05, "loss": 0.5736, "step": 1757 }, { "epoch": 0.66, "learning_rate": 1.691225743683621e-05, "loss": 0.5609, "step": 1758 }, { "epoch": 0.66, "learning_rate": 1.690749952454856e-05, "loss": 0.5669, "step": 1759 }, { "epoch": 0.66, "learning_rate": 1.690273861973634e-05, "loss": 0.5848, "step": 1760 }, { "epoch": 0.66, "learning_rate": 1.6897974724462108e-05, "loss": 0.5493, "step": 1761 }, { "epoch": 0.66, "learning_rate": 1.6893207840789723e-05, "loss": 0.6343, "step": 1762 }, { "epoch": 0.66, "learning_rate": 1.688843797078433e-05, "loss": 0.5558, "step": 1763 }, { "epoch": 0.67, "learning_rate": 1.6883665116512377e-05, "loss": 0.6257, "step": 1764 }, { "epoch": 0.67, "learning_rate": 1.6878889280041593e-05, "loss": 0.5425, "step": 1765 }, { "epoch": 0.67, "learning_rate": 1.687411046344101e-05, "loss": 0.6052, "step": 1766 }, { "epoch": 0.67, "learning_rate": 1.6869328668780943e-05, "loss": 0.6076, "step": 1767 }, { "epoch": 0.67, "learning_rate": 1.6864543898133007e-05, "loss": 0.5736, "step": 1768 }, { "epoch": 0.67, "learning_rate": 1.68597561535701e-05, "loss": 0.5945, "step": 1769 }, { "epoch": 0.67, "learning_rate": 1.68549654371664e-05, "loss": 0.5758, "step": 1770 }, { "epoch": 0.67, "learning_rate": 1.685017175099739e-05, "loss": 0.6092, "step": 1771 }, { "epoch": 0.67, "learning_rate": 1.684537509713983e-05, "loss": 0.5941, "step": 1772 }, { "epoch": 0.67, "learning_rate": 1.684057547767176e-05, "loss": 0.6198, "step": 1773 }, { "epoch": 0.67, "learning_rate": 1.6835772894672512e-05, "loss": 0.5502, "step": 1774 }, { "epoch": 0.67, "learning_rate": 1.683096735022271e-05, "loss": 0.6166, "step": 1775 }, { "epoch": 0.67, "learning_rate": 1.6826158846404246e-05, "loss": 0.607, "step": 1776 }, { "epoch": 0.67, "learning_rate": 1.6821347385300298e-05, "loss": 0.5881, "step": 1777 }, { "epoch": 0.67, "learning_rate": 1.681653296899533e-05, "loss": 0.5926, "step": 1778 }, { "epoch": 0.67, "learning_rate": 1.6811715599575085e-05, "loss": 0.627, "step": 1779 }, { "epoch": 0.67, "learning_rate": 1.680689527912658e-05, "loss": 0.6077, "step": 1780 }, { "epoch": 0.67, "learning_rate": 1.6802072009738117e-05, "loss": 0.5737, "step": 1781 }, { "epoch": 0.67, "learning_rate": 1.6797245793499275e-05, "loss": 0.6102, "step": 1782 }, { "epoch": 0.67, "learning_rate": 1.6792416632500904e-05, "loss": 0.6006, "step": 1783 }, { "epoch": 0.67, "learning_rate": 1.6787584528835132e-05, "loss": 0.6229, "step": 1784 }, { "epoch": 0.67, "learning_rate": 1.6782749484595367e-05, "loss": 0.6058, "step": 1785 }, { "epoch": 0.67, "learning_rate": 1.677791150187629e-05, "loss": 0.6249, "step": 1786 }, { "epoch": 0.67, "learning_rate": 1.6773070582773848e-05, "loss": 0.5373, "step": 1787 }, { "epoch": 0.67, "learning_rate": 1.6768226729385262e-05, "loss": 0.5241, "step": 1788 }, { "epoch": 0.67, "learning_rate": 1.676337994380903e-05, "loss": 0.6422, "step": 1789 }, { "epoch": 0.67, "learning_rate": 1.6758530228144917e-05, "loss": 0.5956, "step": 1790 }, { "epoch": 0.68, "learning_rate": 1.6753677584493956e-05, "loss": 0.6181, "step": 1791 }, { "epoch": 0.68, "learning_rate": 1.674882201495845e-05, "loss": 0.5988, "step": 1792 }, { "epoch": 0.68, "learning_rate": 1.674396352164197e-05, "loss": 0.6129, "step": 1793 }, { "epoch": 0.68, "learning_rate": 1.6739102106649355e-05, "loss": 0.5815, "step": 1794 }, { "epoch": 0.68, "learning_rate": 1.6734237772086704e-05, "loss": 0.5724, "step": 1795 }, { "epoch": 0.68, "learning_rate": 1.6729370520061385e-05, "loss": 0.5929, "step": 1796 }, { "epoch": 0.68, "learning_rate": 1.672450035268203e-05, "loss": 0.5737, "step": 1797 }, { "epoch": 0.68, "learning_rate": 1.6719627272058535e-05, "loss": 0.6195, "step": 1798 }, { "epoch": 0.68, "learning_rate": 1.6714751280302058e-05, "loss": 0.5758, "step": 1799 }, { "epoch": 0.68, "learning_rate": 1.6709872379525012e-05, "loss": 0.5943, "step": 1800 }, { "epoch": 0.68, "learning_rate": 1.6704990571841077e-05, "loss": 0.5635, "step": 1801 }, { "epoch": 0.68, "learning_rate": 1.670010585936519e-05, "loss": 0.5756, "step": 1802 }, { "epoch": 0.68, "learning_rate": 1.6695218244213553e-05, "loss": 0.5263, "step": 1803 }, { "epoch": 0.68, "learning_rate": 1.669032772850361e-05, "loss": 0.5823, "step": 1804 }, { "epoch": 0.68, "learning_rate": 1.6685434314354077e-05, "loss": 0.5944, "step": 1805 }, { "epoch": 0.68, "learning_rate": 1.668053800388492e-05, "loss": 0.5826, "step": 1806 }, { "epoch": 0.68, "learning_rate": 1.6675638799217355e-05, "loss": 0.5689, "step": 1807 }, { "epoch": 0.68, "learning_rate": 1.667073670247386e-05, "loss": 0.6047, "step": 1808 }, { "epoch": 0.68, "learning_rate": 1.6665831715778162e-05, "loss": 0.5892, "step": 1809 }, { "epoch": 0.68, "learning_rate": 1.666092384125524e-05, "loss": 0.6243, "step": 1810 }, { "epoch": 0.68, "learning_rate": 1.665601308103132e-05, "loss": 0.6404, "step": 1811 }, { "epoch": 0.68, "learning_rate": 1.665109943723389e-05, "loss": 0.5926, "step": 1812 }, { "epoch": 0.68, "learning_rate": 1.6646182911991674e-05, "loss": 0.5678, "step": 1813 }, { "epoch": 0.68, "learning_rate": 1.664126350743465e-05, "loss": 0.5595, "step": 1814 }, { "epoch": 0.68, "learning_rate": 1.6636341225694047e-05, "loss": 0.5777, "step": 1815 }, { "epoch": 0.68, "learning_rate": 1.663141606890234e-05, "loss": 0.6064, "step": 1816 }, { "epoch": 0.69, "learning_rate": 1.662648803919324e-05, "loss": 0.5564, "step": 1817 }, { "epoch": 0.69, "learning_rate": 1.662155713870171e-05, "loss": 0.6038, "step": 1818 }, { "epoch": 0.69, "learning_rate": 1.6616623369563957e-05, "loss": 0.595, "step": 1819 }, { "epoch": 0.69, "learning_rate": 1.661168673391743e-05, "loss": 0.5335, "step": 1820 }, { "epoch": 0.69, "learning_rate": 1.6606747233900816e-05, "loss": 0.5794, "step": 1821 }, { "epoch": 0.69, "learning_rate": 1.6601804871654052e-05, "loss": 0.5744, "step": 1822 }, { "epoch": 0.69, "learning_rate": 1.6596859649318308e-05, "loss": 0.5895, "step": 1823 }, { "epoch": 0.69, "learning_rate": 1.659191156903599e-05, "loss": 0.5893, "step": 1824 }, { "epoch": 0.69, "learning_rate": 1.6586960632950756e-05, "loss": 0.587, "step": 1825 }, { "epoch": 0.69, "learning_rate": 1.658200684320748e-05, "loss": 0.5773, "step": 1826 }, { "epoch": 0.69, "learning_rate": 1.6577050201952293e-05, "loss": 0.6206, "step": 1827 }, { "epoch": 0.69, "learning_rate": 1.6572090711332553e-05, "loss": 0.5796, "step": 1828 }, { "epoch": 0.69, "learning_rate": 1.6567128373496843e-05, "loss": 0.5605, "step": 1829 }, { "epoch": 0.69, "learning_rate": 1.6562163190594994e-05, "loss": 0.5585, "step": 1830 }, { "epoch": 0.69, "learning_rate": 1.6557195164778067e-05, "loss": 0.5839, "step": 1831 }, { "epoch": 0.69, "learning_rate": 1.655222429819835e-05, "loss": 0.6263, "step": 1832 }, { "epoch": 0.69, "learning_rate": 1.6547250593009355e-05, "loss": 0.5669, "step": 1833 }, { "epoch": 0.69, "learning_rate": 1.654227405136584e-05, "loss": 0.6054, "step": 1834 }, { "epoch": 0.69, "learning_rate": 1.6537294675423786e-05, "loss": 0.6212, "step": 1835 }, { "epoch": 0.69, "learning_rate": 1.6532312467340393e-05, "loss": 0.5873, "step": 1836 }, { "epoch": 0.69, "learning_rate": 1.65273274292741e-05, "loss": 0.6, "step": 1837 }, { "epoch": 0.69, "learning_rate": 1.6522339563384564e-05, "loss": 0.5726, "step": 1838 }, { "epoch": 0.69, "learning_rate": 1.6517348871832673e-05, "loss": 0.6172, "step": 1839 }, { "epoch": 0.69, "learning_rate": 1.6512355356780528e-05, "loss": 0.5809, "step": 1840 }, { "epoch": 0.69, "learning_rate": 1.6507359020391476e-05, "loss": 0.5717, "step": 1841 }, { "epoch": 0.69, "learning_rate": 1.6502359864830056e-05, "loss": 0.5635, "step": 1842 }, { "epoch": 0.69, "learning_rate": 1.649735789226205e-05, "loss": 0.564, "step": 1843 }, { "epoch": 0.7, "learning_rate": 1.649235310485446e-05, "loss": 0.5779, "step": 1844 }, { "epoch": 0.7, "learning_rate": 1.6487345504775492e-05, "loss": 0.5971, "step": 1845 }, { "epoch": 0.7, "learning_rate": 1.6482335094194586e-05, "loss": 0.6035, "step": 1846 }, { "epoch": 0.7, "learning_rate": 1.6477321875282394e-05, "loss": 0.4888, "step": 1847 }, { "epoch": 0.7, "learning_rate": 1.647230585021078e-05, "loss": 0.6, "step": 1848 }, { "epoch": 0.7, "learning_rate": 1.6467287021152834e-05, "loss": 0.6016, "step": 1849 }, { "epoch": 0.7, "learning_rate": 1.6462265390282852e-05, "loss": 0.5561, "step": 1850 }, { "epoch": 0.7, "learning_rate": 1.6457240959776344e-05, "loss": 0.6152, "step": 1851 }, { "epoch": 0.7, "learning_rate": 1.6452213731810043e-05, "loss": 0.5087, "step": 1852 }, { "epoch": 0.7, "learning_rate": 1.6447183708561882e-05, "loss": 0.5849, "step": 1853 }, { "epoch": 0.7, "learning_rate": 1.6442150892211008e-05, "loss": 0.5485, "step": 1854 }, { "epoch": 0.7, "learning_rate": 1.6437115284937784e-05, "loss": 0.5392, "step": 1855 }, { "epoch": 0.7, "learning_rate": 1.6432076888923774e-05, "loss": 0.6216, "step": 1856 }, { "epoch": 0.7, "learning_rate": 1.642703570635176e-05, "loss": 0.5848, "step": 1857 }, { "epoch": 0.7, "learning_rate": 1.642199173940571e-05, "loss": 0.5941, "step": 1858 }, { "epoch": 0.7, "learning_rate": 1.6416944990270832e-05, "loss": 0.5365, "step": 1859 }, { "epoch": 0.7, "learning_rate": 1.6411895461133513e-05, "loss": 0.6197, "step": 1860 }, { "epoch": 0.7, "learning_rate": 1.640684315418135e-05, "loss": 0.5738, "step": 1861 }, { "epoch": 0.7, "learning_rate": 1.6401788071603147e-05, "loss": 0.5816, "step": 1862 }, { "epoch": 0.7, "learning_rate": 1.6396730215588913e-05, "loss": 0.6559, "step": 1863 }, { "epoch": 0.7, "learning_rate": 1.639166958832985e-05, "loss": 0.6645, "step": 1864 }, { "epoch": 0.7, "learning_rate": 1.6386606192018367e-05, "loss": 0.6076, "step": 1865 }, { "epoch": 0.7, "learning_rate": 1.638154002884807e-05, "loss": 0.5483, "step": 1866 }, { "epoch": 0.7, "learning_rate": 1.6376471101013763e-05, "loss": 0.5764, "step": 1867 }, { "epoch": 0.7, "learning_rate": 1.637139941071145e-05, "loss": 0.572, "step": 1868 }, { "epoch": 0.7, "learning_rate": 1.6366324960138336e-05, "loss": 0.5718, "step": 1869 }, { "epoch": 0.71, "learning_rate": 1.636124775149281e-05, "loss": 0.5839, "step": 1870 }, { "epoch": 0.71, "learning_rate": 1.6356167786974467e-05, "loss": 0.5814, "step": 1871 }, { "epoch": 0.71, "learning_rate": 1.635108506878409e-05, "loss": 0.5836, "step": 1872 }, { "epoch": 0.71, "learning_rate": 1.6345999599123656e-05, "loss": 0.5712, "step": 1873 }, { "epoch": 0.71, "learning_rate": 1.634091138019633e-05, "loss": 0.5675, "step": 1874 }, { "epoch": 0.71, "learning_rate": 1.6335820414206486e-05, "loss": 0.6065, "step": 1875 }, { "epoch": 0.71, "learning_rate": 1.633072670335966e-05, "loss": 0.5764, "step": 1876 }, { "epoch": 0.71, "learning_rate": 1.63256302498626e-05, "loss": 0.6035, "step": 1877 }, { "epoch": 0.71, "learning_rate": 1.6320531055923235e-05, "loss": 0.6096, "step": 1878 }, { "epoch": 0.71, "learning_rate": 1.6315429123750676e-05, "loss": 0.5889, "step": 1879 }, { "epoch": 0.71, "learning_rate": 1.631032445555523e-05, "loss": 0.5773, "step": 1880 }, { "epoch": 0.71, "learning_rate": 1.6305217053548377e-05, "loss": 0.5512, "step": 1881 }, { "epoch": 0.71, "learning_rate": 1.6300106919942792e-05, "loss": 0.5785, "step": 1882 }, { "epoch": 0.71, "learning_rate": 1.629499405695233e-05, "loss": 0.5749, "step": 1883 }, { "epoch": 0.71, "learning_rate": 1.628987846679203e-05, "loss": 0.5437, "step": 1884 }, { "epoch": 0.71, "learning_rate": 1.6284760151678103e-05, "loss": 0.5936, "step": 1885 }, { "epoch": 0.71, "learning_rate": 1.627963911382796e-05, "loss": 0.5881, "step": 1886 }, { "epoch": 0.71, "learning_rate": 1.6274515355460167e-05, "loss": 0.5181, "step": 1887 }, { "epoch": 0.71, "learning_rate": 1.6269388878794488e-05, "loss": 0.5367, "step": 1888 }, { "epoch": 0.71, "learning_rate": 1.6264259686051863e-05, "loss": 0.6605, "step": 1889 }, { "epoch": 0.71, "learning_rate": 1.6259127779454394e-05, "loss": 0.5681, "step": 1890 }, { "epoch": 0.71, "learning_rate": 1.625399316122537e-05, "loss": 0.5299, "step": 1891 }, { "epoch": 0.71, "learning_rate": 1.624885583358926e-05, "loss": 0.6129, "step": 1892 }, { "epoch": 0.71, "learning_rate": 1.6243715798771696e-05, "loss": 0.5493, "step": 1893 }, { "epoch": 0.71, "learning_rate": 1.6238573058999484e-05, "loss": 0.6245, "step": 1894 }, { "epoch": 0.71, "learning_rate": 1.623342761650061e-05, "loss": 0.6347, "step": 1895 }, { "epoch": 0.71, "learning_rate": 1.622827947350422e-05, "loss": 0.5799, "step": 1896 }, { "epoch": 0.72, "learning_rate": 1.622312863224064e-05, "loss": 0.5834, "step": 1897 }, { "epoch": 0.72, "learning_rate": 1.6217975094941356e-05, "loss": 0.6363, "step": 1898 }, { "epoch": 0.72, "learning_rate": 1.621281886383903e-05, "loss": 0.6094, "step": 1899 }, { "epoch": 0.72, "learning_rate": 1.6207659941167485e-05, "loss": 0.6084, "step": 1900 }, { "epoch": 0.72, "learning_rate": 1.6202498329161717e-05, "loss": 0.6219, "step": 1901 }, { "epoch": 0.72, "learning_rate": 1.619733403005788e-05, "loss": 0.5589, "step": 1902 }, { "epoch": 0.72, "learning_rate": 1.619216704609329e-05, "loss": 0.5546, "step": 1903 }, { "epoch": 0.72, "learning_rate": 1.618699737950644e-05, "loss": 0.5913, "step": 1904 }, { "epoch": 0.72, "learning_rate": 1.6181825032536965e-05, "loss": 0.5845, "step": 1905 }, { "epoch": 0.72, "learning_rate": 1.617665000742568e-05, "loss": 0.5624, "step": 1906 }, { "epoch": 0.72, "learning_rate": 1.6171472306414554e-05, "loss": 0.5778, "step": 1907 }, { "epoch": 0.72, "learning_rate": 1.6166291931746708e-05, "loss": 0.6063, "step": 1908 }, { "epoch": 0.72, "learning_rate": 1.6161108885666434e-05, "loss": 0.5847, "step": 1909 }, { "epoch": 0.72, "learning_rate": 1.615592317041917e-05, "loss": 0.6306, "step": 1910 }, { "epoch": 0.72, "learning_rate": 1.615073478825151e-05, "loss": 0.5813, "step": 1911 }, { "epoch": 0.72, "learning_rate": 1.6145543741411216e-05, "loss": 0.6032, "step": 1912 }, { "epoch": 0.72, "learning_rate": 1.6140350032147196e-05, "loss": 0.6251, "step": 1913 }, { "epoch": 0.72, "learning_rate": 1.6135153662709507e-05, "loss": 0.5578, "step": 1914 }, { "epoch": 0.72, "learning_rate": 1.612995463534937e-05, "loss": 0.613, "step": 1915 }, { "epoch": 0.72, "learning_rate": 1.6124752952319143e-05, "loss": 0.5922, "step": 1916 }, { "epoch": 0.72, "learning_rate": 1.6119548615872352e-05, "loss": 0.6229, "step": 1917 }, { "epoch": 0.72, "learning_rate": 1.6114341628263657e-05, "loss": 0.5842, "step": 1918 }, { "epoch": 0.72, "learning_rate": 1.6109131991748875e-05, "loss": 0.6178, "step": 1919 }, { "epoch": 0.72, "learning_rate": 1.6103919708584967e-05, "loss": 0.5896, "step": 1920 }, { "epoch": 0.72, "learning_rate": 1.609870478103004e-05, "loss": 0.5511, "step": 1921 }, { "epoch": 0.72, "learning_rate": 1.609348721134335e-05, "loss": 0.6158, "step": 1922 }, { "epoch": 0.73, "learning_rate": 1.6088267001785298e-05, "loss": 0.5391, "step": 1923 }, { "epoch": 0.73, "learning_rate": 1.6083044154617423e-05, "loss": 0.5917, "step": 1924 }, { "epoch": 0.73, "learning_rate": 1.6077818672102412e-05, "loss": 0.5723, "step": 1925 }, { "epoch": 0.73, "learning_rate": 1.6072590556504097e-05, "loss": 0.5628, "step": 1926 }, { "epoch": 0.73, "learning_rate": 1.6067359810087436e-05, "loss": 0.575, "step": 1927 }, { "epoch": 0.73, "learning_rate": 1.6062126435118546e-05, "loss": 0.5223, "step": 1928 }, { "epoch": 0.73, "learning_rate": 1.605689043386467e-05, "loss": 0.6258, "step": 1929 }, { "epoch": 0.73, "learning_rate": 1.605165180859419e-05, "loss": 0.6297, "step": 1930 }, { "epoch": 0.73, "learning_rate": 1.6046410561576627e-05, "loss": 0.6248, "step": 1931 }, { "epoch": 0.73, "learning_rate": 1.6041166695082638e-05, "loss": 0.5919, "step": 1932 }, { "epoch": 0.73, "learning_rate": 1.6035920211384023e-05, "loss": 0.585, "step": 1933 }, { "epoch": 0.73, "learning_rate": 1.603067111275369e-05, "loss": 0.6311, "step": 1934 }, { "epoch": 0.73, "learning_rate": 1.6025419401465715e-05, "loss": 0.5575, "step": 1935 }, { "epoch": 0.73, "learning_rate": 1.602016507979528e-05, "loss": 0.5469, "step": 1936 }, { "epoch": 0.73, "learning_rate": 1.6014908150018703e-05, "loss": 0.6128, "step": 1937 }, { "epoch": 0.73, "learning_rate": 1.6009648614413446e-05, "loss": 0.602, "step": 1938 }, { "epoch": 0.73, "learning_rate": 1.6004386475258075e-05, "loss": 0.5841, "step": 1939 }, { "epoch": 0.73, "learning_rate": 1.5999121734832308e-05, "loss": 0.5186, "step": 1940 }, { "epoch": 0.73, "learning_rate": 1.5993854395416972e-05, "loss": 0.5925, "step": 1941 }, { "epoch": 0.73, "learning_rate": 1.5988584459294035e-05, "loss": 0.5685, "step": 1942 }, { "epoch": 0.73, "learning_rate": 1.598331192874658e-05, "loss": 0.6005, "step": 1943 }, { "epoch": 0.73, "learning_rate": 1.597803680605882e-05, "loss": 0.6479, "step": 1944 }, { "epoch": 0.73, "learning_rate": 1.5972759093516078e-05, "loss": 0.5671, "step": 1945 }, { "epoch": 0.73, "learning_rate": 1.5967478793404817e-05, "loss": 0.5887, "step": 1946 }, { "epoch": 0.73, "learning_rate": 1.596219590801261e-05, "loss": 0.5587, "step": 1947 }, { "epoch": 0.73, "learning_rate": 1.5956910439628152e-05, "loss": 0.598, "step": 1948 }, { "epoch": 0.73, "learning_rate": 1.5951622390541263e-05, "loss": 0.5836, "step": 1949 }, { "epoch": 0.74, "learning_rate": 1.594633176304287e-05, "loss": 0.624, "step": 1950 }, { "epoch": 0.74, "learning_rate": 1.594103855942502e-05, "loss": 0.6075, "step": 1951 }, { "epoch": 0.74, "learning_rate": 1.5935742781980887e-05, "loss": 0.571, "step": 1952 }, { "epoch": 0.74, "learning_rate": 1.5930444433004746e-05, "loss": 0.6093, "step": 1953 }, { "epoch": 0.74, "learning_rate": 1.5925143514791997e-05, "loss": 0.5555, "step": 1954 }, { "epoch": 0.74, "learning_rate": 1.5919840029639142e-05, "loss": 0.5716, "step": 1955 }, { "epoch": 0.74, "learning_rate": 1.5914533979843802e-05, "loss": 0.5846, "step": 1956 }, { "epoch": 0.74, "learning_rate": 1.5909225367704712e-05, "loss": 0.5764, "step": 1957 }, { "epoch": 0.74, "learning_rate": 1.5903914195521713e-05, "loss": 0.5497, "step": 1958 }, { "epoch": 0.74, "learning_rate": 1.5898600465595753e-05, "loss": 0.5829, "step": 1959 }, { "epoch": 0.74, "learning_rate": 1.589328418022889e-05, "loss": 0.5772, "step": 1960 }, { "epoch": 0.74, "learning_rate": 1.5887965341724294e-05, "loss": 0.576, "step": 1961 }, { "epoch": 0.74, "learning_rate": 1.588264395238623e-05, "loss": 0.5885, "step": 1962 }, { "epoch": 0.74, "learning_rate": 1.587732001452008e-05, "loss": 0.564, "step": 1963 }, { "epoch": 0.74, "learning_rate": 1.587199353043232e-05, "loss": 0.644, "step": 1964 }, { "epoch": 0.74, "learning_rate": 1.586666450243054e-05, "loss": 0.5057, "step": 1965 }, { "epoch": 0.74, "learning_rate": 1.586133293282342e-05, "loss": 0.6074, "step": 1966 }, { "epoch": 0.74, "learning_rate": 1.5855998823920752e-05, "loss": 0.5816, "step": 1967 }, { "epoch": 0.74, "learning_rate": 1.5850662178033417e-05, "loss": 0.6453, "step": 1968 }, { "epoch": 0.74, "learning_rate": 1.5845322997473408e-05, "loss": 0.5439, "step": 1969 }, { "epoch": 0.74, "learning_rate": 1.5839981284553806e-05, "loss": 0.5741, "step": 1970 }, { "epoch": 0.74, "learning_rate": 1.583463704158879e-05, "loss": 0.5859, "step": 1971 }, { "epoch": 0.74, "learning_rate": 1.5829290270893642e-05, "loss": 0.6057, "step": 1972 }, { "epoch": 0.74, "learning_rate": 1.5823940974784732e-05, "loss": 0.5969, "step": 1973 }, { "epoch": 0.74, "learning_rate": 1.581858915557953e-05, "loss": 0.5667, "step": 1974 }, { "epoch": 0.74, "learning_rate": 1.5813234815596593e-05, "loss": 0.5503, "step": 1975 }, { "epoch": 0.75, "learning_rate": 1.580787795715557e-05, "loss": 0.5686, "step": 1976 }, { "epoch": 0.75, "learning_rate": 1.5802518582577212e-05, "loss": 0.6054, "step": 1977 }, { "epoch": 0.75, "learning_rate": 1.5797156694183343e-05, "loss": 0.5354, "step": 1978 }, { "epoch": 0.75, "learning_rate": 1.579179229429689e-05, "loss": 0.614, "step": 1979 }, { "epoch": 0.75, "learning_rate": 1.578642538524186e-05, "loss": 0.6068, "step": 1980 }, { "epoch": 0.75, "learning_rate": 1.5781055969343357e-05, "loss": 0.532, "step": 1981 }, { "epoch": 0.75, "learning_rate": 1.577568404892756e-05, "loss": 0.5703, "step": 1982 }, { "epoch": 0.75, "learning_rate": 1.577030962632173e-05, "loss": 0.6071, "step": 1983 }, { "epoch": 0.75, "learning_rate": 1.576493270385423e-05, "loss": 0.4992, "step": 1984 }, { "epoch": 0.75, "learning_rate": 1.5759553283854492e-05, "loss": 0.6028, "step": 1985 }, { "epoch": 0.75, "learning_rate": 1.575417136865303e-05, "loss": 0.5916, "step": 1986 }, { "epoch": 0.75, "learning_rate": 1.5748786960581447e-05, "loss": 0.5563, "step": 1987 }, { "epoch": 0.75, "learning_rate": 1.5743400061972412e-05, "loss": 0.5689, "step": 1988 }, { "epoch": 0.75, "learning_rate": 1.5738010675159698e-05, "loss": 0.5581, "step": 1989 }, { "epoch": 0.75, "learning_rate": 1.5732618802478125e-05, "loss": 0.5486, "step": 1990 }, { "epoch": 0.75, "learning_rate": 1.5727224446263608e-05, "loss": 0.6332, "step": 1991 }, { "epoch": 0.75, "learning_rate": 1.5721827608853147e-05, "loss": 0.5323, "step": 1992 }, { "epoch": 0.75, "learning_rate": 1.5716428292584788e-05, "loss": 0.5425, "step": 1993 }, { "epoch": 0.75, "learning_rate": 1.571102649979768e-05, "loss": 0.5745, "step": 1994 }, { "epoch": 0.75, "learning_rate": 1.5705622232832025e-05, "loss": 0.5774, "step": 1995 }, { "epoch": 0.75, "learning_rate": 1.570021549402911e-05, "loss": 0.6113, "step": 1996 }, { "epoch": 0.75, "learning_rate": 1.5694806285731285e-05, "loss": 0.5771, "step": 1997 }, { "epoch": 0.75, "learning_rate": 1.5689394610281973e-05, "loss": 0.5428, "step": 1998 }, { "epoch": 0.75, "learning_rate": 1.5683980470025667e-05, "loss": 0.5448, "step": 1999 }, { "epoch": 0.75, "learning_rate": 1.5678563867307922e-05, "loss": 0.5711, "step": 2000 }, { "epoch": 0.75, "learning_rate": 1.5673144804475367e-05, "loss": 0.5799, "step": 2001 }, { "epoch": 0.75, "learning_rate": 1.566772328387569e-05, "loss": 0.6091, "step": 2002 }, { "epoch": 0.76, "learning_rate": 1.566229930785765e-05, "loss": 0.6222, "step": 2003 }, { "epoch": 0.76, "learning_rate": 1.565687287877107e-05, "loss": 0.5505, "step": 2004 }, { "epoch": 0.76, "learning_rate": 1.565144399896683e-05, "loss": 0.5379, "step": 2005 }, { "epoch": 0.76, "learning_rate": 1.5646012670796872e-05, "loss": 0.5967, "step": 2006 }, { "epoch": 0.76, "learning_rate": 1.56405788966142e-05, "loss": 0.5776, "step": 2007 }, { "epoch": 0.76, "learning_rate": 1.5635142678772892e-05, "loss": 0.5726, "step": 2008 }, { "epoch": 0.76, "learning_rate": 1.562970401962806e-05, "loss": 0.5734, "step": 2009 }, { "epoch": 0.76, "learning_rate": 1.5624262921535883e-05, "loss": 0.5751, "step": 2010 }, { "epoch": 0.76, "learning_rate": 1.5618819386853607e-05, "loss": 0.6112, "step": 2011 }, { "epoch": 0.76, "learning_rate": 1.5613373417939522e-05, "loss": 0.5577, "step": 2012 }, { "epoch": 0.76, "learning_rate": 1.560792501715297e-05, "loss": 0.5694, "step": 2013 }, { "epoch": 0.76, "learning_rate": 1.5602474186854368e-05, "loss": 0.5843, "step": 2014 }, { "epoch": 0.76, "learning_rate": 1.5597020929405154e-05, "loss": 0.5421, "step": 2015 }, { "epoch": 0.76, "learning_rate": 1.5591565247167845e-05, "loss": 0.5148, "step": 2016 }, { "epoch": 0.76, "learning_rate": 1.5586107142505994e-05, "loss": 0.5957, "step": 2017 }, { "epoch": 0.76, "learning_rate": 1.5580646617784202e-05, "loss": 0.5882, "step": 2018 }, { "epoch": 0.76, "learning_rate": 1.5575183675368127e-05, "loss": 0.6287, "step": 2019 }, { "epoch": 0.76, "learning_rate": 1.556971831762447e-05, "loss": 0.6214, "step": 2020 }, { "epoch": 0.76, "learning_rate": 1.556425054692098e-05, "loss": 0.5814, "step": 2021 }, { "epoch": 0.76, "learning_rate": 1.555878036562645e-05, "loss": 0.6095, "step": 2022 }, { "epoch": 0.76, "learning_rate": 1.5553307776110714e-05, "loss": 0.6098, "step": 2023 }, { "epoch": 0.76, "learning_rate": 1.554783278074466e-05, "loss": 0.5314, "step": 2024 }, { "epoch": 0.76, "learning_rate": 1.55423553819002e-05, "loss": 0.5802, "step": 2025 }, { "epoch": 0.76, "learning_rate": 1.5536875581950312e-05, "loss": 0.5348, "step": 2026 }, { "epoch": 0.76, "learning_rate": 1.5531393383268987e-05, "loss": 0.5636, "step": 2027 }, { "epoch": 0.76, "learning_rate": 1.5525908788231285e-05, "loss": 0.582, "step": 2028 }, { "epoch": 0.77, "learning_rate": 1.552042179921327e-05, "loss": 0.5838, "step": 2029 }, { "epoch": 0.77, "learning_rate": 1.5514932418592075e-05, "loss": 0.5561, "step": 2030 }, { "epoch": 0.77, "learning_rate": 1.5509440648745848e-05, "loss": 0.559, "step": 2031 }, { "epoch": 0.77, "learning_rate": 1.5503946492053784e-05, "loss": 0.5888, "step": 2032 }, { "epoch": 0.77, "learning_rate": 1.54984499508961e-05, "loss": 0.5722, "step": 2033 }, { "epoch": 0.77, "learning_rate": 1.5492951027654065e-05, "loss": 0.5645, "step": 2034 }, { "epoch": 0.77, "learning_rate": 1.5487449724709956e-05, "loss": 0.5434, "step": 2035 }, { "epoch": 0.77, "learning_rate": 1.54819460444471e-05, "loss": 0.595, "step": 2036 }, { "epoch": 0.77, "learning_rate": 1.5476439989249847e-05, "loss": 0.6222, "step": 2037 }, { "epoch": 0.77, "learning_rate": 1.5470931561503575e-05, "loss": 0.571, "step": 2038 }, { "epoch": 0.77, "learning_rate": 1.546542076359469e-05, "loss": 0.6055, "step": 2039 }, { "epoch": 0.77, "learning_rate": 1.5459907597910634e-05, "loss": 0.541, "step": 2040 }, { "epoch": 0.77, "learning_rate": 1.545439206683985e-05, "loss": 0.5675, "step": 2041 }, { "epoch": 0.77, "learning_rate": 1.544887417277184e-05, "loss": 0.5983, "step": 2042 }, { "epoch": 0.77, "learning_rate": 1.5443353918097104e-05, "loss": 0.5921, "step": 2043 }, { "epoch": 0.77, "learning_rate": 1.5437831305207172e-05, "loss": 0.5864, "step": 2044 }, { "epoch": 0.77, "learning_rate": 1.5432306336494597e-05, "loss": 0.5627, "step": 2045 }, { "epoch": 0.77, "learning_rate": 1.542677901435295e-05, "loss": 0.5849, "step": 2046 }, { "epoch": 0.77, "learning_rate": 1.5421249341176833e-05, "loss": 0.5622, "step": 2047 }, { "epoch": 0.77, "learning_rate": 1.541571731936185e-05, "loss": 0.6088, "step": 2048 }, { "epoch": 0.77, "learning_rate": 1.5410182951304625e-05, "loss": 0.584, "step": 2049 }, { "epoch": 0.77, "learning_rate": 1.540464623940281e-05, "loss": 0.5697, "step": 2050 }, { "epoch": 0.77, "learning_rate": 1.539910718605506e-05, "loss": 0.5333, "step": 2051 }, { "epoch": 0.77, "learning_rate": 1.539356579366106e-05, "loss": 0.5895, "step": 2052 }, { "epoch": 0.77, "learning_rate": 1.538802206462149e-05, "loss": 0.5919, "step": 2053 }, { "epoch": 0.77, "learning_rate": 1.5382476001338053e-05, "loss": 0.6176, "step": 2054 }, { "epoch": 0.77, "learning_rate": 1.537692760621346e-05, "loss": 0.5776, "step": 2055 }, { "epoch": 0.78, "learning_rate": 1.5371376881651432e-05, "loss": 0.5248, "step": 2056 }, { "epoch": 0.78, "learning_rate": 1.53658238300567e-05, "loss": 0.54, "step": 2057 }, { "epoch": 0.78, "learning_rate": 1.5360268453835007e-05, "loss": 0.615, "step": 2058 }, { "epoch": 0.78, "learning_rate": 1.5354710755393098e-05, "loss": 0.6354, "step": 2059 }, { "epoch": 0.78, "learning_rate": 1.5349150737138722e-05, "loss": 0.5717, "step": 2060 }, { "epoch": 0.78, "learning_rate": 1.534358840148064e-05, "loss": 0.5993, "step": 2061 }, { "epoch": 0.78, "learning_rate": 1.5338023750828613e-05, "loss": 0.6088, "step": 2062 }, { "epoch": 0.78, "learning_rate": 1.533245678759341e-05, "loss": 0.5852, "step": 2063 }, { "epoch": 0.78, "learning_rate": 1.5326887514186785e-05, "loss": 0.5689, "step": 2064 }, { "epoch": 0.78, "learning_rate": 1.5321315933021516e-05, "loss": 0.6137, "step": 2065 }, { "epoch": 0.78, "learning_rate": 1.5315742046511367e-05, "loss": 0.5728, "step": 2066 }, { "epoch": 0.78, "learning_rate": 1.5310165857071096e-05, "loss": 0.589, "step": 2067 }, { "epoch": 0.78, "learning_rate": 1.530458736711648e-05, "loss": 0.589, "step": 2068 }, { "epoch": 0.78, "learning_rate": 1.5299006579064273e-05, "loss": 0.5366, "step": 2069 }, { "epoch": 0.78, "learning_rate": 1.5293423495332228e-05, "loss": 0.5833, "step": 2070 }, { "epoch": 0.78, "learning_rate": 1.5287838118339092e-05, "loss": 0.5573, "step": 2071 }, { "epoch": 0.78, "learning_rate": 1.528225045050462e-05, "loss": 0.5713, "step": 2072 }, { "epoch": 0.78, "learning_rate": 1.5276660494249535e-05, "loss": 0.5465, "step": 2073 }, { "epoch": 0.78, "learning_rate": 1.5271068251995577e-05, "loss": 0.5563, "step": 2074 }, { "epoch": 0.78, "learning_rate": 1.5265473726165456e-05, "loss": 0.6104, "step": 2075 }, { "epoch": 0.78, "learning_rate": 1.525987691918288e-05, "loss": 0.6028, "step": 2076 }, { "epoch": 0.78, "learning_rate": 1.5254277833472544e-05, "loss": 0.5848, "step": 2077 }, { "epoch": 0.78, "learning_rate": 1.5248676471460137e-05, "loss": 0.599, "step": 2078 }, { "epoch": 0.78, "learning_rate": 1.5243072835572319e-05, "loss": 0.6279, "step": 2079 }, { "epoch": 0.78, "learning_rate": 1.5237466928236745e-05, "loss": 0.5709, "step": 2080 }, { "epoch": 0.78, "learning_rate": 1.5231858751882063e-05, "loss": 0.5997, "step": 2081 }, { "epoch": 0.79, "learning_rate": 1.5226248308937881e-05, "loss": 0.5596, "step": 2082 }, { "epoch": 0.79, "learning_rate": 1.5220635601834807e-05, "loss": 0.5632, "step": 2083 }, { "epoch": 0.79, "learning_rate": 1.5215020633004427e-05, "loss": 0.5698, "step": 2084 }, { "epoch": 0.79, "learning_rate": 1.5209403404879305e-05, "loss": 0.5862, "step": 2085 }, { "epoch": 0.79, "learning_rate": 1.5203783919892978e-05, "loss": 0.6192, "step": 2086 }, { "epoch": 0.79, "learning_rate": 1.5198162180479972e-05, "loss": 0.5509, "step": 2087 }, { "epoch": 0.79, "learning_rate": 1.5192538189075778e-05, "loss": 0.5257, "step": 2088 }, { "epoch": 0.79, "learning_rate": 1.5186911948116874e-05, "loss": 0.5535, "step": 2089 }, { "epoch": 0.79, "learning_rate": 1.5181283460040705e-05, "loss": 0.5759, "step": 2090 }, { "epoch": 0.79, "learning_rate": 1.517565272728569e-05, "loss": 0.5731, "step": 2091 }, { "epoch": 0.79, "learning_rate": 1.5170019752291225e-05, "loss": 0.5456, "step": 2092 }, { "epoch": 0.79, "learning_rate": 1.5164384537497672e-05, "loss": 0.6294, "step": 2093 }, { "epoch": 0.79, "learning_rate": 1.5158747085346369e-05, "loss": 0.578, "step": 2094 }, { "epoch": 0.79, "learning_rate": 1.5153107398279615e-05, "loss": 0.5948, "step": 2095 }, { "epoch": 0.79, "learning_rate": 1.5147465478740687e-05, "loss": 0.6368, "step": 2096 }, { "epoch": 0.79, "learning_rate": 1.5141821329173817e-05, "loss": 0.6155, "step": 2097 }, { "epoch": 0.79, "learning_rate": 1.5136174952024219e-05, "loss": 0.5853, "step": 2098 }, { "epoch": 0.79, "learning_rate": 1.5130526349738062e-05, "loss": 0.5915, "step": 2099 }, { "epoch": 0.79, "learning_rate": 1.5124875524762479e-05, "loss": 0.4768, "step": 2100 }, { "epoch": 0.79, "learning_rate": 1.5119222479545566e-05, "loss": 0.5749, "step": 2101 }, { "epoch": 0.79, "learning_rate": 1.5113567216536384e-05, "loss": 0.5614, "step": 2102 }, { "epoch": 0.79, "learning_rate": 1.5107909738184957e-05, "loss": 0.6014, "step": 2103 }, { "epoch": 0.79, "learning_rate": 1.5102250046942257e-05, "loss": 0.5219, "step": 2104 }, { "epoch": 0.79, "learning_rate": 1.5096588145260227e-05, "loss": 0.5933, "step": 2105 }, { "epoch": 0.79, "learning_rate": 1.5090924035591768e-05, "loss": 0.6136, "step": 2106 }, { "epoch": 0.79, "learning_rate": 1.5085257720390722e-05, "loss": 0.5813, "step": 2107 }, { "epoch": 0.79, "learning_rate": 1.507958920211191e-05, "loss": 0.5745, "step": 2108 }, { "epoch": 0.8, "learning_rate": 1.507391848321108e-05, "loss": 0.5661, "step": 2109 }, { "epoch": 0.8, "learning_rate": 1.5068245566144964e-05, "loss": 0.5908, "step": 2110 }, { "epoch": 0.8, "learning_rate": 1.5062570453371217e-05, "loss": 0.6066, "step": 2111 }, { "epoch": 0.8, "learning_rate": 1.5056893147348466e-05, "loss": 0.5684, "step": 2112 }, { "epoch": 0.8, "learning_rate": 1.5051213650536282e-05, "loss": 0.5528, "step": 2113 }, { "epoch": 0.8, "learning_rate": 1.504553196539518e-05, "loss": 0.5859, "step": 2114 }, { "epoch": 0.8, "learning_rate": 1.5039848094386629e-05, "loss": 0.6457, "step": 2115 }, { "epoch": 0.8, "learning_rate": 1.5034162039973042e-05, "loss": 0.5979, "step": 2116 }, { "epoch": 0.8, "learning_rate": 1.5028473804617779e-05, "loss": 0.6035, "step": 2117 }, { "epoch": 0.8, "learning_rate": 1.5022783390785143e-05, "loss": 0.5684, "step": 2118 }, { "epoch": 0.8, "learning_rate": 1.5017090800940386e-05, "loss": 0.5454, "step": 2119 }, { "epoch": 0.8, "learning_rate": 1.50113960375497e-05, "loss": 0.5775, "step": 2120 }, { "epoch": 0.8, "learning_rate": 1.5005699103080211e-05, "loss": 0.6257, "step": 2121 }, { "epoch": 0.8, "learning_rate": 1.5000000000000002e-05, "loss": 0.6147, "step": 2122 }, { "epoch": 0.8, "learning_rate": 1.4994298730778078e-05, "loss": 0.5664, "step": 2123 }, { "epoch": 0.8, "learning_rate": 1.4988595297884398e-05, "loss": 0.5878, "step": 2124 }, { "epoch": 0.8, "learning_rate": 1.4982889703789845e-05, "loss": 0.5806, "step": 2125 }, { "epoch": 0.8, "learning_rate": 1.4977181950966242e-05, "loss": 0.5566, "step": 2126 }, { "epoch": 0.8, "learning_rate": 1.4971472041886354e-05, "loss": 0.5549, "step": 2127 }, { "epoch": 0.8, "learning_rate": 1.4965759979023877e-05, "loss": 0.5493, "step": 2128 }, { "epoch": 0.8, "learning_rate": 1.4960045764853432e-05, "loss": 0.5406, "step": 2129 }, { "epoch": 0.8, "learning_rate": 1.4954329401850585e-05, "loss": 0.5844, "step": 2130 }, { "epoch": 0.8, "learning_rate": 1.4948610892491823e-05, "loss": 0.5666, "step": 2131 }, { "epoch": 0.8, "learning_rate": 1.4942890239254571e-05, "loss": 0.5496, "step": 2132 }, { "epoch": 0.8, "learning_rate": 1.4937167444617168e-05, "loss": 0.5562, "step": 2133 }, { "epoch": 0.8, "learning_rate": 1.49314425110589e-05, "loss": 0.5382, "step": 2134 }, { "epoch": 0.81, "learning_rate": 1.4925715441059972e-05, "loss": 0.5576, "step": 2135 }, { "epoch": 0.81, "learning_rate": 1.4919986237101501e-05, "loss": 0.5805, "step": 2136 }, { "epoch": 0.81, "learning_rate": 1.491425490166555e-05, "loss": 0.5797, "step": 2137 }, { "epoch": 0.81, "learning_rate": 1.4908521437235094e-05, "loss": 0.6016, "step": 2138 }, { "epoch": 0.81, "learning_rate": 1.4902785846294036e-05, "loss": 0.5666, "step": 2139 }, { "epoch": 0.81, "learning_rate": 1.4897048131327193e-05, "loss": 0.5756, "step": 2140 }, { "epoch": 0.81, "learning_rate": 1.4891308294820301e-05, "loss": 0.5771, "step": 2141 }, { "epoch": 0.81, "learning_rate": 1.4885566339260027e-05, "loss": 0.5794, "step": 2142 }, { "epoch": 0.81, "learning_rate": 1.4879822267133945e-05, "loss": 0.5753, "step": 2143 }, { "epoch": 0.81, "learning_rate": 1.4874076080930552e-05, "loss": 0.5245, "step": 2144 }, { "epoch": 0.81, "learning_rate": 1.4868327783139258e-05, "loss": 0.5414, "step": 2145 }, { "epoch": 0.81, "learning_rate": 1.4862577376250391e-05, "loss": 0.5779, "step": 2146 }, { "epoch": 0.81, "learning_rate": 1.4856824862755185e-05, "loss": 0.5582, "step": 2147 }, { "epoch": 0.81, "learning_rate": 1.4851070245145796e-05, "loss": 0.6099, "step": 2148 }, { "epoch": 0.81, "learning_rate": 1.4845313525915287e-05, "loss": 0.5514, "step": 2149 }, { "epoch": 0.81, "learning_rate": 1.483955470755763e-05, "loss": 0.6073, "step": 2150 }, { "epoch": 0.81, "learning_rate": 1.4833793792567705e-05, "loss": 0.5604, "step": 2151 }, { "epoch": 0.81, "learning_rate": 1.4828030783441313e-05, "loss": 0.5518, "step": 2152 }, { "epoch": 0.81, "learning_rate": 1.4822265682675148e-05, "loss": 0.5563, "step": 2153 }, { "epoch": 0.81, "learning_rate": 1.4816498492766815e-05, "loss": 0.5853, "step": 2154 }, { "epoch": 0.81, "learning_rate": 1.481072921621482e-05, "loss": 0.5572, "step": 2155 }, { "epoch": 0.81, "learning_rate": 1.4804957855518585e-05, "loss": 0.6465, "step": 2156 }, { "epoch": 0.81, "learning_rate": 1.4799184413178425e-05, "loss": 0.5525, "step": 2157 }, { "epoch": 0.81, "learning_rate": 1.4793408891695551e-05, "loss": 0.594, "step": 2158 }, { "epoch": 0.81, "learning_rate": 1.4787631293572094e-05, "loss": 0.61, "step": 2159 }, { "epoch": 0.81, "learning_rate": 1.4781851621311063e-05, "loss": 0.5468, "step": 2160 }, { "epoch": 0.81, "learning_rate": 1.4776069877416386e-05, "loss": 0.5516, "step": 2161 }, { "epoch": 0.82, "learning_rate": 1.477028606439287e-05, "loss": 0.6244, "step": 2162 }, { "epoch": 0.82, "learning_rate": 1.4764500184746231e-05, "loss": 0.5326, "step": 2163 }, { "epoch": 0.82, "learning_rate": 1.4758712240983073e-05, "loss": 0.5645, "step": 2164 }, { "epoch": 0.82, "learning_rate": 1.47529222356109e-05, "loss": 0.6416, "step": 2165 }, { "epoch": 0.82, "learning_rate": 1.4747130171138106e-05, "loss": 0.5333, "step": 2166 }, { "epoch": 0.82, "learning_rate": 1.4741336050073978e-05, "loss": 0.6244, "step": 2167 }, { "epoch": 0.82, "learning_rate": 1.4735539874928693e-05, "loss": 0.5359, "step": 2168 }, { "epoch": 0.82, "learning_rate": 1.4729741648213316e-05, "loss": 0.5773, "step": 2169 }, { "epoch": 0.82, "learning_rate": 1.4723941372439803e-05, "loss": 0.5852, "step": 2170 }, { "epoch": 0.82, "learning_rate": 1.4718139050121005e-05, "loss": 0.6402, "step": 2171 }, { "epoch": 0.82, "learning_rate": 1.4712334683770644e-05, "loss": 0.5722, "step": 2172 }, { "epoch": 0.82, "learning_rate": 1.470652827590334e-05, "loss": 0.5896, "step": 2173 }, { "epoch": 0.82, "learning_rate": 1.4700719829034592e-05, "loss": 0.5662, "step": 2174 }, { "epoch": 0.82, "learning_rate": 1.4694909345680788e-05, "loss": 0.5467, "step": 2175 }, { "epoch": 0.82, "learning_rate": 1.4689096828359188e-05, "loss": 0.6072, "step": 2176 }, { "epoch": 0.82, "learning_rate": 1.468328227958794e-05, "loss": 0.5907, "step": 2177 }, { "epoch": 0.82, "learning_rate": 1.4677465701886078e-05, "loss": 0.5404, "step": 2178 }, { "epoch": 0.82, "learning_rate": 1.46716470977735e-05, "loss": 0.5815, "step": 2179 }, { "epoch": 0.82, "learning_rate": 1.4665826469770994e-05, "loss": 0.5463, "step": 2180 }, { "epoch": 0.82, "learning_rate": 1.4660003820400222e-05, "loss": 0.574, "step": 2181 }, { "epoch": 0.82, "learning_rate": 1.4654179152183721e-05, "loss": 0.5865, "step": 2182 }, { "epoch": 0.82, "learning_rate": 1.4648352467644897e-05, "loss": 0.575, "step": 2183 }, { "epoch": 0.82, "learning_rate": 1.464252376930804e-05, "loss": 0.5922, "step": 2184 }, { "epoch": 0.82, "learning_rate": 1.4636693059698309e-05, "loss": 0.5269, "step": 2185 }, { "epoch": 0.82, "learning_rate": 1.4630860341341728e-05, "loss": 0.5364, "step": 2186 }, { "epoch": 0.82, "learning_rate": 1.4625025616765199e-05, "loss": 0.5541, "step": 2187 }, { "epoch": 0.83, "learning_rate": 1.4619188888496485e-05, "loss": 0.5956, "step": 2188 }, { "epoch": 0.83, "learning_rate": 1.461335015906423e-05, "loss": 0.5929, "step": 2189 }, { "epoch": 0.83, "learning_rate": 1.4607509430997931e-05, "loss": 0.5484, "step": 2190 }, { "epoch": 0.83, "learning_rate": 1.4601666706827959e-05, "loss": 0.6544, "step": 2191 }, { "epoch": 0.83, "learning_rate": 1.4595821989085549e-05, "loss": 0.5995, "step": 2192 }, { "epoch": 0.83, "learning_rate": 1.4589975280302793e-05, "loss": 0.5731, "step": 2193 }, { "epoch": 0.83, "learning_rate": 1.4584126583012658e-05, "loss": 0.5774, "step": 2194 }, { "epoch": 0.83, "learning_rate": 1.4578275899748962e-05, "loss": 0.5535, "step": 2195 }, { "epoch": 0.83, "learning_rate": 1.4572423233046386e-05, "loss": 0.56, "step": 2196 }, { "epoch": 0.83, "learning_rate": 1.4566568585440475e-05, "loss": 0.5534, "step": 2197 }, { "epoch": 0.83, "learning_rate": 1.4560711959467624e-05, "loss": 0.5717, "step": 2198 }, { "epoch": 0.83, "learning_rate": 1.4554853357665088e-05, "loss": 0.5439, "step": 2199 }, { "epoch": 0.83, "learning_rate": 1.4548992782570988e-05, "loss": 0.5768, "step": 2200 }, { "epoch": 0.83, "learning_rate": 1.454313023672428e-05, "loss": 0.6118, "step": 2201 }, { "epoch": 0.83, "learning_rate": 1.4537265722664793e-05, "loss": 0.5663, "step": 2202 }, { "epoch": 0.83, "learning_rate": 1.4531399242933198e-05, "loss": 0.5775, "step": 2203 }, { "epoch": 0.83, "learning_rate": 1.4525530800071022e-05, "loss": 0.4963, "step": 2204 }, { "epoch": 0.83, "learning_rate": 1.4519660396620636e-05, "loss": 0.5934, "step": 2205 }, { "epoch": 0.83, "learning_rate": 1.4513788035125269e-05, "loss": 0.5757, "step": 2206 }, { "epoch": 0.83, "learning_rate": 1.4507913718129e-05, "loss": 0.6323, "step": 2207 }, { "epoch": 0.83, "learning_rate": 1.4502037448176734e-05, "loss": 0.563, "step": 2208 }, { "epoch": 0.83, "learning_rate": 1.4496159227814253e-05, "loss": 0.5954, "step": 2209 }, { "epoch": 0.83, "learning_rate": 1.449027905958816e-05, "loss": 0.567, "step": 2210 }, { "epoch": 0.83, "learning_rate": 1.4484396946045915e-05, "loss": 0.6038, "step": 2211 }, { "epoch": 0.83, "learning_rate": 1.4478512889735813e-05, "loss": 0.5479, "step": 2212 }, { "epoch": 0.83, "learning_rate": 1.4472626893206997e-05, "loss": 0.5626, "step": 2213 }, { "epoch": 0.83, "learning_rate": 1.4466738959009448e-05, "loss": 0.5561, "step": 2214 }, { "epoch": 0.84, "learning_rate": 1.4460849089693982e-05, "loss": 0.6199, "step": 2215 }, { "epoch": 0.84, "learning_rate": 1.4454957287812258e-05, "loss": 0.5516, "step": 2216 }, { "epoch": 0.84, "learning_rate": 1.4449063555916774e-05, "loss": 0.5726, "step": 2217 }, { "epoch": 0.84, "learning_rate": 1.4443167896560863e-05, "loss": 0.5826, "step": 2218 }, { "epoch": 0.84, "learning_rate": 1.4437270312298687e-05, "loss": 0.5271, "step": 2219 }, { "epoch": 0.84, "learning_rate": 1.4431370805685249e-05, "loss": 0.5642, "step": 2220 }, { "epoch": 0.84, "learning_rate": 1.4425469379276385e-05, "loss": 0.5636, "step": 2221 }, { "epoch": 0.84, "learning_rate": 1.4419566035628756e-05, "loss": 0.5295, "step": 2222 }, { "epoch": 0.84, "learning_rate": 1.441366077729986e-05, "loss": 0.5619, "step": 2223 }, { "epoch": 0.84, "learning_rate": 1.4407753606848018e-05, "loss": 0.6116, "step": 2224 }, { "epoch": 0.84, "learning_rate": 1.4401844526832394e-05, "loss": 0.55, "step": 2225 }, { "epoch": 0.84, "learning_rate": 1.4395933539812958e-05, "loss": 0.6293, "step": 2226 }, { "epoch": 0.84, "learning_rate": 1.439002064835052e-05, "loss": 0.5407, "step": 2227 }, { "epoch": 0.84, "learning_rate": 1.4384105855006718e-05, "loss": 0.535, "step": 2228 }, { "epoch": 0.84, "learning_rate": 1.4378189162344002e-05, "loss": 0.5717, "step": 2229 }, { "epoch": 0.84, "learning_rate": 1.4372270572925653e-05, "loss": 0.5858, "step": 2230 }, { "epoch": 0.84, "learning_rate": 1.436635008931577e-05, "loss": 0.5589, "step": 2231 }, { "epoch": 0.84, "learning_rate": 1.4360427714079279e-05, "loss": 0.6038, "step": 2232 }, { "epoch": 0.84, "learning_rate": 1.4354503449781914e-05, "loss": 0.5643, "step": 2233 }, { "epoch": 0.84, "learning_rate": 1.4348577298990239e-05, "loss": 0.5954, "step": 2234 }, { "epoch": 0.84, "learning_rate": 1.4342649264271628e-05, "loss": 0.5722, "step": 2235 }, { "epoch": 0.84, "learning_rate": 1.4336719348194277e-05, "loss": 0.5364, "step": 2236 }, { "epoch": 0.84, "learning_rate": 1.433078755332719e-05, "loss": 0.5666, "step": 2237 }, { "epoch": 0.84, "learning_rate": 1.432485388224019e-05, "loss": 0.5511, "step": 2238 }, { "epoch": 0.84, "learning_rate": 1.4318918337503914e-05, "loss": 0.6027, "step": 2239 }, { "epoch": 0.84, "learning_rate": 1.4312980921689805e-05, "loss": 0.5079, "step": 2240 }, { "epoch": 0.85, "learning_rate": 1.430704163737012e-05, "loss": 0.5627, "step": 2241 }, { "epoch": 0.85, "learning_rate": 1.4301100487117926e-05, "loss": 0.5876, "step": 2242 }, { "epoch": 0.85, "learning_rate": 1.4295157473507097e-05, "loss": 0.5799, "step": 2243 }, { "epoch": 0.85, "learning_rate": 1.4289212599112316e-05, "loss": 0.6002, "step": 2244 }, { "epoch": 0.85, "learning_rate": 1.4283265866509072e-05, "loss": 0.5454, "step": 2245 }, { "epoch": 0.85, "learning_rate": 1.4277317278273657e-05, "loss": 0.5592, "step": 2246 }, { "epoch": 0.85, "learning_rate": 1.4271366836983166e-05, "loss": 0.5788, "step": 2247 }, { "epoch": 0.85, "learning_rate": 1.4265414545215503e-05, "loss": 0.5622, "step": 2248 }, { "epoch": 0.85, "learning_rate": 1.4259460405549363e-05, "loss": 0.5823, "step": 2249 }, { "epoch": 0.85, "learning_rate": 1.4253504420564256e-05, "loss": 0.5799, "step": 2250 }, { "epoch": 0.85, "learning_rate": 1.424754659284048e-05, "loss": 0.5518, "step": 2251 }, { "epoch": 0.85, "learning_rate": 1.4241586924959132e-05, "loss": 0.5615, "step": 2252 }, { "epoch": 0.85, "learning_rate": 1.4235625419502113e-05, "loss": 0.6144, "step": 2253 }, { "epoch": 0.85, "learning_rate": 1.4229662079052116e-05, "loss": 0.5732, "step": 2254 }, { "epoch": 0.85, "learning_rate": 1.4223696906192625e-05, "loss": 0.6303, "step": 2255 }, { "epoch": 0.85, "learning_rate": 1.4217729903507926e-05, "loss": 0.5736, "step": 2256 }, { "epoch": 0.85, "learning_rate": 1.4211761073583092e-05, "loss": 0.5647, "step": 2257 }, { "epoch": 0.85, "learning_rate": 1.420579041900399e-05, "loss": 0.5787, "step": 2258 }, { "epoch": 0.85, "learning_rate": 1.4199817942357277e-05, "loss": 0.619, "step": 2259 }, { "epoch": 0.85, "learning_rate": 1.41938436462304e-05, "loss": 0.5623, "step": 2260 }, { "epoch": 0.85, "learning_rate": 1.4187867533211592e-05, "loss": 0.6367, "step": 2261 }, { "epoch": 0.85, "learning_rate": 1.418188960588987e-05, "loss": 0.5991, "step": 2262 }, { "epoch": 0.85, "learning_rate": 1.417590986685505e-05, "loss": 0.5299, "step": 2263 }, { "epoch": 0.85, "learning_rate": 1.4169928318697721e-05, "loss": 0.5874, "step": 2264 }, { "epoch": 0.85, "learning_rate": 1.4163944964009258e-05, "loss": 0.551, "step": 2265 }, { "epoch": 0.85, "learning_rate": 1.415795980538182e-05, "loss": 0.5875, "step": 2266 }, { "epoch": 0.85, "learning_rate": 1.4151972845408348e-05, "loss": 0.6187, "step": 2267 }, { "epoch": 0.86, "learning_rate": 1.4145984086682563e-05, "loss": 0.564, "step": 2268 }, { "epoch": 0.86, "learning_rate": 1.4139993531798966e-05, "loss": 0.5826, "step": 2269 }, { "epoch": 0.86, "learning_rate": 1.4134001183352833e-05, "loss": 0.5868, "step": 2270 }, { "epoch": 0.86, "learning_rate": 1.4128007043940217e-05, "loss": 0.6224, "step": 2271 }, { "epoch": 0.86, "learning_rate": 1.4122011116157956e-05, "loss": 0.6209, "step": 2272 }, { "epoch": 0.86, "learning_rate": 1.4116013402603653e-05, "loss": 0.5439, "step": 2273 }, { "epoch": 0.86, "learning_rate": 1.4110013905875682e-05, "loss": 0.5637, "step": 2274 }, { "epoch": 0.86, "learning_rate": 1.4104012628573203e-05, "loss": 0.5816, "step": 2275 }, { "epoch": 0.86, "learning_rate": 1.4098009573296137e-05, "loss": 0.5964, "step": 2276 }, { "epoch": 0.86, "learning_rate": 1.4092004742645172e-05, "loss": 0.5814, "step": 2277 }, { "epoch": 0.86, "learning_rate": 1.4085998139221777e-05, "loss": 0.5938, "step": 2278 }, { "epoch": 0.86, "learning_rate": 1.4079989765628184e-05, "loss": 0.5879, "step": 2279 }, { "epoch": 0.86, "learning_rate": 1.4073979624467384e-05, "loss": 0.6135, "step": 2280 }, { "epoch": 0.86, "learning_rate": 1.4067967718343148e-05, "loss": 0.6095, "step": 2281 }, { "epoch": 0.86, "learning_rate": 1.4061954049860001e-05, "loss": 0.6246, "step": 2282 }, { "epoch": 0.86, "learning_rate": 1.4055938621623236e-05, "loss": 0.6423, "step": 2283 }, { "epoch": 0.86, "learning_rate": 1.4049921436238898e-05, "loss": 0.5842, "step": 2284 }, { "epoch": 0.86, "learning_rate": 1.4043902496313812e-05, "loss": 0.6207, "step": 2285 }, { "epoch": 0.86, "learning_rate": 1.4037881804455555e-05, "loss": 0.5835, "step": 2286 }, { "epoch": 0.86, "learning_rate": 1.4031859363272456e-05, "loss": 0.6008, "step": 2287 }, { "epoch": 0.86, "learning_rate": 1.4025835175373606e-05, "loss": 0.5861, "step": 2288 }, { "epoch": 0.86, "learning_rate": 1.4019809243368858e-05, "loss": 0.5433, "step": 2289 }, { "epoch": 0.86, "learning_rate": 1.4013781569868816e-05, "loss": 0.4801, "step": 2290 }, { "epoch": 0.86, "learning_rate": 1.4007752157484835e-05, "loss": 0.5356, "step": 2291 }, { "epoch": 0.86, "learning_rate": 1.4001721008829032e-05, "loss": 0.6254, "step": 2292 }, { "epoch": 0.86, "learning_rate": 1.399568812651427e-05, "loss": 0.5906, "step": 2293 }, { "epoch": 0.87, "learning_rate": 1.3989653513154165e-05, "loss": 0.5865, "step": 2294 }, { "epoch": 0.87, "learning_rate": 1.3983617171363082e-05, "loss": 0.5626, "step": 2295 }, { "epoch": 0.87, "learning_rate": 1.3977579103756131e-05, "loss": 0.5175, "step": 2296 }, { "epoch": 0.87, "learning_rate": 1.3971539312949185e-05, "loss": 0.5927, "step": 2297 }, { "epoch": 0.87, "learning_rate": 1.3965497801558842e-05, "loss": 0.5218, "step": 2298 }, { "epoch": 0.87, "learning_rate": 1.395945457220246e-05, "loss": 0.581, "step": 2299 }, { "epoch": 0.87, "learning_rate": 1.3953409627498137e-05, "loss": 0.55, "step": 2300 }, { "epoch": 0.87, "learning_rate": 1.3947362970064717e-05, "loss": 0.6094, "step": 2301 }, { "epoch": 0.87, "learning_rate": 1.3941314602521777e-05, "loss": 0.575, "step": 2302 }, { "epoch": 0.87, "learning_rate": 1.3935264527489648e-05, "loss": 0.5969, "step": 2303 }, { "epoch": 0.87, "learning_rate": 1.3929212747589392e-05, "loss": 0.5756, "step": 2304 }, { "epoch": 0.87, "learning_rate": 1.392315926544281e-05, "loss": 0.574, "step": 2305 }, { "epoch": 0.87, "learning_rate": 1.3917104083672439e-05, "loss": 0.5716, "step": 2306 }, { "epoch": 0.87, "learning_rate": 1.391104720490156e-05, "loss": 0.5533, "step": 2307 }, { "epoch": 0.87, "learning_rate": 1.3904988631754188e-05, "loss": 0.5738, "step": 2308 }, { "epoch": 0.87, "learning_rate": 1.389892836685506e-05, "loss": 0.5632, "step": 2309 }, { "epoch": 0.87, "learning_rate": 1.389286641282966e-05, "loss": 0.532, "step": 2310 }, { "epoch": 0.87, "learning_rate": 1.3886802772304195e-05, "loss": 0.5654, "step": 2311 }, { "epoch": 0.87, "learning_rate": 1.3880737447905612e-05, "loss": 0.5115, "step": 2312 }, { "epoch": 0.87, "learning_rate": 1.3874670442261574e-05, "loss": 0.5734, "step": 2313 }, { "epoch": 0.87, "learning_rate": 1.3868601758000487e-05, "loss": 0.5646, "step": 2314 }, { "epoch": 0.87, "learning_rate": 1.3862531397751469e-05, "loss": 0.5821, "step": 2315 }, { "epoch": 0.87, "learning_rate": 1.3856459364144378e-05, "loss": 0.5482, "step": 2316 }, { "epoch": 0.87, "learning_rate": 1.3850385659809788e-05, "loss": 0.5692, "step": 2317 }, { "epoch": 0.87, "learning_rate": 1.3844310287379004e-05, "loss": 0.6313, "step": 2318 }, { "epoch": 0.87, "learning_rate": 1.3838233249484045e-05, "loss": 0.5561, "step": 2319 }, { "epoch": 0.87, "learning_rate": 1.383215454875766e-05, "loss": 0.5668, "step": 2320 }, { "epoch": 0.88, "learning_rate": 1.3826074187833307e-05, "loss": 0.5609, "step": 2321 }, { "epoch": 0.88, "learning_rate": 1.381999216934518e-05, "loss": 0.6089, "step": 2322 }, { "epoch": 0.88, "learning_rate": 1.3813908495928177e-05, "loss": 0.5952, "step": 2323 }, { "epoch": 0.88, "learning_rate": 1.380782317021792e-05, "loss": 0.5334, "step": 2324 }, { "epoch": 0.88, "learning_rate": 1.380173619485074e-05, "loss": 0.5835, "step": 2325 }, { "epoch": 0.88, "learning_rate": 1.3795647572463693e-05, "loss": 0.6293, "step": 2326 }, { "epoch": 0.88, "learning_rate": 1.3789557305694538e-05, "loss": 0.5825, "step": 2327 }, { "epoch": 0.88, "learning_rate": 1.3783465397181752e-05, "loss": 0.5498, "step": 2328 }, { "epoch": 0.88, "learning_rate": 1.3777371849564524e-05, "loss": 0.5601, "step": 2329 }, { "epoch": 0.88, "learning_rate": 1.3771276665482756e-05, "loss": 0.5897, "step": 2330 }, { "epoch": 0.88, "learning_rate": 1.3765179847577043e-05, "loss": 0.5176, "step": 2331 }, { "epoch": 0.88, "learning_rate": 1.3759081398488712e-05, "loss": 0.5981, "step": 2332 }, { "epoch": 0.88, "learning_rate": 1.3752981320859777e-05, "loss": 0.5887, "step": 2333 }, { "epoch": 0.88, "learning_rate": 1.3746879617332962e-05, "loss": 0.5734, "step": 2334 }, { "epoch": 0.88, "learning_rate": 1.3740776290551703e-05, "loss": 0.5516, "step": 2335 }, { "epoch": 0.88, "learning_rate": 1.3734671343160133e-05, "loss": 0.5768, "step": 2336 }, { "epoch": 0.88, "learning_rate": 1.3728564777803089e-05, "loss": 0.5368, "step": 2337 }, { "epoch": 0.88, "learning_rate": 1.3722456597126104e-05, "loss": 0.5813, "step": 2338 }, { "epoch": 0.88, "learning_rate": 1.371634680377542e-05, "loss": 0.6007, "step": 2339 }, { "epoch": 0.88, "learning_rate": 1.3710235400397972e-05, "loss": 0.6214, "step": 2340 }, { "epoch": 0.88, "learning_rate": 1.3704122389641388e-05, "loss": 0.6489, "step": 2341 }, { "epoch": 0.88, "learning_rate": 1.3698007774154003e-05, "loss": 0.5886, "step": 2342 }, { "epoch": 0.88, "learning_rate": 1.369189155658484e-05, "loss": 0.5877, "step": 2343 }, { "epoch": 0.88, "learning_rate": 1.368577373958362e-05, "loss": 0.6137, "step": 2344 }, { "epoch": 0.88, "learning_rate": 1.3679654325800748e-05, "loss": 0.5735, "step": 2345 }, { "epoch": 0.88, "learning_rate": 1.3673533317887336e-05, "loss": 0.5614, "step": 2346 }, { "epoch": 0.88, "learning_rate": 1.3667410718495174e-05, "loss": 0.6102, "step": 2347 }, { "epoch": 0.89, "learning_rate": 1.3661286530276744e-05, "loss": 0.5817, "step": 2348 }, { "epoch": 0.89, "learning_rate": 1.3655160755885222e-05, "loss": 0.5544, "step": 2349 }, { "epoch": 0.89, "learning_rate": 1.3649033397974459e-05, "loss": 0.568, "step": 2350 }, { "epoch": 0.89, "learning_rate": 1.3642904459199011e-05, "loss": 0.6356, "step": 2351 }, { "epoch": 0.89, "learning_rate": 1.3636773942214103e-05, "loss": 0.6031, "step": 2352 }, { "epoch": 0.89, "learning_rate": 1.3630641849675646e-05, "loss": 0.623, "step": 2353 }, { "epoch": 0.89, "learning_rate": 1.3624508184240239e-05, "loss": 0.5939, "step": 2354 }, { "epoch": 0.89, "learning_rate": 1.3618372948565162e-05, "loss": 0.5508, "step": 2355 }, { "epoch": 0.89, "learning_rate": 1.3612236145308368e-05, "loss": 0.573, "step": 2356 }, { "epoch": 0.89, "learning_rate": 1.3606097777128497e-05, "loss": 0.6177, "step": 2357 }, { "epoch": 0.89, "learning_rate": 1.3599957846684865e-05, "loss": 0.507, "step": 2358 }, { "epoch": 0.89, "learning_rate": 1.3593816356637462e-05, "loss": 0.5937, "step": 2359 }, { "epoch": 0.89, "learning_rate": 1.3587673309646956e-05, "loss": 0.5754, "step": 2360 }, { "epoch": 0.89, "learning_rate": 1.3581528708374689e-05, "loss": 0.582, "step": 2361 }, { "epoch": 0.89, "learning_rate": 1.3575382555482677e-05, "loss": 0.6099, "step": 2362 }, { "epoch": 0.89, "learning_rate": 1.3569234853633606e-05, "loss": 0.587, "step": 2363 }, { "epoch": 0.89, "learning_rate": 1.3563085605490838e-05, "loss": 0.5997, "step": 2364 }, { "epoch": 0.89, "learning_rate": 1.3556934813718398e-05, "loss": 0.6128, "step": 2365 }, { "epoch": 0.89, "learning_rate": 1.3550782480980984e-05, "loss": 0.6069, "step": 2366 }, { "epoch": 0.89, "learning_rate": 1.3544628609943964e-05, "loss": 0.6102, "step": 2367 }, { "epoch": 0.89, "learning_rate": 1.3538473203273366e-05, "loss": 0.6008, "step": 2368 }, { "epoch": 0.89, "learning_rate": 1.3532316263635887e-05, "loss": 0.5971, "step": 2369 }, { "epoch": 0.89, "learning_rate": 1.352615779369889e-05, "loss": 0.5566, "step": 2370 }, { "epoch": 0.89, "learning_rate": 1.3519997796130397e-05, "loss": 0.5601, "step": 2371 }, { "epoch": 0.89, "learning_rate": 1.3513836273599092e-05, "loss": 0.5628, "step": 2372 }, { "epoch": 0.89, "learning_rate": 1.3507673228774325e-05, "loss": 0.6285, "step": 2373 }, { "epoch": 0.9, "learning_rate": 1.3501508664326097e-05, "loss": 0.5702, "step": 2374 }, { "epoch": 0.9, "learning_rate": 1.3495342582925075e-05, "loss": 0.5839, "step": 2375 }, { "epoch": 0.9, "learning_rate": 1.3489174987242584e-05, "loss": 0.5725, "step": 2376 }, { "epoch": 0.9, "learning_rate": 1.3483005879950594e-05, "loss": 0.6014, "step": 2377 }, { "epoch": 0.9, "learning_rate": 1.347683526372174e-05, "loss": 0.5577, "step": 2378 }, { "epoch": 0.9, "learning_rate": 1.3470663141229308e-05, "loss": 0.5436, "step": 2379 }, { "epoch": 0.9, "learning_rate": 1.3464489515147239e-05, "loss": 0.6653, "step": 2380 }, { "epoch": 0.9, "learning_rate": 1.3458314388150115e-05, "loss": 0.626, "step": 2381 }, { "epoch": 0.9, "learning_rate": 1.3452137762913184e-05, "loss": 0.5475, "step": 2382 }, { "epoch": 0.9, "learning_rate": 1.3445959642112334e-05, "loss": 0.5907, "step": 2383 }, { "epoch": 0.9, "learning_rate": 1.3439780028424098e-05, "loss": 0.6151, "step": 2384 }, { "epoch": 0.9, "learning_rate": 1.3433598924525666e-05, "loss": 0.622, "step": 2385 }, { "epoch": 0.9, "learning_rate": 1.3427416333094861e-05, "loss": 0.6219, "step": 2386 }, { "epoch": 0.9, "learning_rate": 1.342123225681016e-05, "loss": 0.5867, "step": 2387 }, { "epoch": 0.9, "learning_rate": 1.341504669835068e-05, "loss": 0.6144, "step": 2388 }, { "epoch": 0.9, "learning_rate": 1.3408859660396179e-05, "loss": 0.5705, "step": 2389 }, { "epoch": 0.9, "learning_rate": 1.340267114562706e-05, "loss": 0.5622, "step": 2390 }, { "epoch": 0.9, "learning_rate": 1.3396481156724357e-05, "loss": 0.6331, "step": 2391 }, { "epoch": 0.9, "learning_rate": 1.3390289696369756e-05, "loss": 0.5918, "step": 2392 }, { "epoch": 0.9, "learning_rate": 1.338409676724557e-05, "loss": 0.5133, "step": 2393 }, { "epoch": 0.9, "learning_rate": 1.3377902372034748e-05, "loss": 0.5659, "step": 2394 }, { "epoch": 0.9, "learning_rate": 1.3371706513420884e-05, "loss": 0.6029, "step": 2395 }, { "epoch": 0.9, "learning_rate": 1.3365509194088195e-05, "loss": 0.5649, "step": 2396 }, { "epoch": 0.9, "learning_rate": 1.3359310416721533e-05, "loss": 0.6102, "step": 2397 }, { "epoch": 0.9, "learning_rate": 1.3353110184006395e-05, "loss": 0.5664, "step": 2398 }, { "epoch": 0.9, "learning_rate": 1.3346908498628888e-05, "loss": 0.5702, "step": 2399 }, { "epoch": 0.9, "learning_rate": 1.3340705363275756e-05, "loss": 0.5635, "step": 2400 }, { "epoch": 0.91, "learning_rate": 1.3334500780634383e-05, "loss": 0.5964, "step": 2401 }, { "epoch": 0.91, "learning_rate": 1.3328294753392761e-05, "loss": 0.6056, "step": 2402 }, { "epoch": 0.91, "learning_rate": 1.332208728423952e-05, "loss": 0.5293, "step": 2403 }, { "epoch": 0.91, "learning_rate": 1.3315878375863912e-05, "loss": 0.6008, "step": 2404 }, { "epoch": 0.91, "learning_rate": 1.3309668030955813e-05, "loss": 0.5563, "step": 2405 }, { "epoch": 0.91, "learning_rate": 1.3303456252205717e-05, "loss": 0.5462, "step": 2406 }, { "epoch": 0.91, "learning_rate": 1.3297243042304747e-05, "loss": 0.5956, "step": 2407 }, { "epoch": 0.91, "learning_rate": 1.3291028403944638e-05, "loss": 0.6045, "step": 2408 }, { "epoch": 0.91, "learning_rate": 1.3284812339817751e-05, "loss": 0.576, "step": 2409 }, { "epoch": 0.91, "learning_rate": 1.3278594852617056e-05, "loss": 0.5953, "step": 2410 }, { "epoch": 0.91, "learning_rate": 1.3272375945036148e-05, "loss": 0.562, "step": 2411 }, { "epoch": 0.91, "learning_rate": 1.3266155619769236e-05, "loss": 0.6477, "step": 2412 }, { "epoch": 0.91, "learning_rate": 1.325993387951114e-05, "loss": 0.5495, "step": 2413 }, { "epoch": 0.91, "learning_rate": 1.3253710726957291e-05, "loss": 0.5724, "step": 2414 }, { "epoch": 0.91, "learning_rate": 1.3247486164803738e-05, "loss": 0.5699, "step": 2415 }, { "epoch": 0.91, "learning_rate": 1.3241260195747136e-05, "loss": 0.6268, "step": 2416 }, { "epoch": 0.91, "learning_rate": 1.3235032822484755e-05, "loss": 0.5637, "step": 2417 }, { "epoch": 0.91, "learning_rate": 1.3228804047714462e-05, "loss": 0.5281, "step": 2418 }, { "epoch": 0.91, "learning_rate": 1.3222573874134748e-05, "loss": 0.5424, "step": 2419 }, { "epoch": 0.91, "learning_rate": 1.3216342304444696e-05, "loss": 0.5495, "step": 2420 }, { "epoch": 0.91, "learning_rate": 1.3210109341343997e-05, "loss": 0.5658, "step": 2421 }, { "epoch": 0.91, "learning_rate": 1.320387498753295e-05, "loss": 0.5932, "step": 2422 }, { "epoch": 0.91, "learning_rate": 1.3197639245712454e-05, "loss": 0.5455, "step": 2423 }, { "epoch": 0.91, "learning_rate": 1.3191402118584009e-05, "loss": 0.5702, "step": 2424 }, { "epoch": 0.91, "learning_rate": 1.3185163608849711e-05, "loss": 0.5251, "step": 2425 }, { "epoch": 0.91, "learning_rate": 1.3178923719212265e-05, "loss": 0.5576, "step": 2426 }, { "epoch": 0.92, "learning_rate": 1.3172682452374967e-05, "loss": 0.5059, "step": 2427 }, { "epoch": 0.92, "learning_rate": 1.3166439811041704e-05, "loss": 0.6149, "step": 2428 }, { "epoch": 0.92, "learning_rate": 1.3160195797916977e-05, "loss": 0.552, "step": 2429 }, { "epoch": 0.92, "learning_rate": 1.315395041570586e-05, "loss": 0.6191, "step": 2430 }, { "epoch": 0.92, "learning_rate": 1.3147703667114033e-05, "loss": 0.5597, "step": 2431 }, { "epoch": 0.92, "learning_rate": 1.3141455554847765e-05, "loss": 0.5372, "step": 2432 }, { "epoch": 0.92, "learning_rate": 1.3135206081613912e-05, "loss": 0.5773, "step": 2433 }, { "epoch": 0.92, "learning_rate": 1.3128955250119928e-05, "loss": 0.5324, "step": 2434 }, { "epoch": 0.92, "learning_rate": 1.3122703063073845e-05, "loss": 0.5292, "step": 2435 }, { "epoch": 0.92, "learning_rate": 1.3116449523184292e-05, "loss": 0.6121, "step": 2436 }, { "epoch": 0.92, "learning_rate": 1.3110194633160478e-05, "loss": 0.5059, "step": 2437 }, { "epoch": 0.92, "learning_rate": 1.3103938395712198e-05, "loss": 0.5887, "step": 2438 }, { "epoch": 0.92, "learning_rate": 1.3097680813549833e-05, "loss": 0.5765, "step": 2439 }, { "epoch": 0.92, "learning_rate": 1.3091421889384344e-05, "loss": 0.5748, "step": 2440 }, { "epoch": 0.92, "learning_rate": 1.3085161625927275e-05, "loss": 0.5196, "step": 2441 }, { "epoch": 0.92, "learning_rate": 1.3078900025890746e-05, "loss": 0.5553, "step": 2442 }, { "epoch": 0.92, "learning_rate": 1.3072637091987463e-05, "loss": 0.5777, "step": 2443 }, { "epoch": 0.92, "learning_rate": 1.3066372826930708e-05, "loss": 0.5712, "step": 2444 }, { "epoch": 0.92, "learning_rate": 1.3060107233434336e-05, "loss": 0.5543, "step": 2445 }, { "epoch": 0.92, "learning_rate": 1.305384031421278e-05, "loss": 0.5846, "step": 2446 }, { "epoch": 0.92, "learning_rate": 1.3047572071981045e-05, "loss": 0.5816, "step": 2447 }, { "epoch": 0.92, "learning_rate": 1.3041302509454716e-05, "loss": 0.579, "step": 2448 }, { "epoch": 0.92, "learning_rate": 1.3035031629349941e-05, "loss": 0.5728, "step": 2449 }, { "epoch": 0.92, "learning_rate": 1.3028759434383443e-05, "loss": 0.5876, "step": 2450 }, { "epoch": 0.92, "learning_rate": 1.3022485927272514e-05, "loss": 0.5661, "step": 2451 }, { "epoch": 0.92, "learning_rate": 1.3016211110735024e-05, "loss": 0.5689, "step": 2452 }, { "epoch": 0.92, "learning_rate": 1.3009934987489389e-05, "loss": 0.5915, "step": 2453 }, { "epoch": 0.93, "learning_rate": 1.3003657560254608e-05, "loss": 0.6038, "step": 2454 }, { "epoch": 0.93, "learning_rate": 1.2997378831750242e-05, "loss": 0.6232, "step": 2455 }, { "epoch": 0.93, "learning_rate": 1.2991098804696419e-05, "loss": 0.5293, "step": 2456 }, { "epoch": 0.93, "learning_rate": 1.2984817481813812e-05, "loss": 0.6138, "step": 2457 }, { "epoch": 0.93, "learning_rate": 1.2978534865823678e-05, "loss": 0.6022, "step": 2458 }, { "epoch": 0.93, "learning_rate": 1.2972250959447824e-05, "loss": 0.5522, "step": 2459 }, { "epoch": 0.93, "learning_rate": 1.2965965765408607e-05, "loss": 0.5772, "step": 2460 }, { "epoch": 0.93, "learning_rate": 1.2959679286428963e-05, "loss": 0.5209, "step": 2461 }, { "epoch": 0.93, "learning_rate": 1.2953391525232367e-05, "loss": 0.6126, "step": 2462 }, { "epoch": 0.93, "learning_rate": 1.2947102484542855e-05, "loss": 0.5516, "step": 2463 }, { "epoch": 0.93, "learning_rate": 1.2940812167085022e-05, "loss": 0.5305, "step": 2464 }, { "epoch": 0.93, "learning_rate": 1.2934520575584007e-05, "loss": 0.6494, "step": 2465 }, { "epoch": 0.93, "learning_rate": 1.2928227712765504e-05, "loss": 0.6259, "step": 2466 }, { "epoch": 0.93, "learning_rate": 1.2921933581355767e-05, "loss": 0.6664, "step": 2467 }, { "epoch": 0.93, "learning_rate": 1.2915638184081587e-05, "loss": 0.5483, "step": 2468 }, { "epoch": 0.93, "learning_rate": 1.2909341523670309e-05, "loss": 0.4826, "step": 2469 }, { "epoch": 0.93, "learning_rate": 1.2903043602849828e-05, "loss": 0.5875, "step": 2470 }, { "epoch": 0.93, "learning_rate": 1.2896744424348577e-05, "loss": 0.6608, "step": 2471 }, { "epoch": 0.93, "learning_rate": 1.289044399089554e-05, "loss": 0.6447, "step": 2472 }, { "epoch": 0.93, "learning_rate": 1.2884142305220246e-05, "loss": 0.5736, "step": 2473 }, { "epoch": 0.93, "learning_rate": 1.2877839370052763e-05, "loss": 0.5864, "step": 2474 }, { "epoch": 0.93, "learning_rate": 1.2871535188123698e-05, "loss": 0.6322, "step": 2475 }, { "epoch": 0.93, "learning_rate": 1.2865229762164203e-05, "loss": 0.5768, "step": 2476 }, { "epoch": 0.93, "learning_rate": 1.285892309490597e-05, "loss": 0.562, "step": 2477 }, { "epoch": 0.93, "learning_rate": 1.285261518908122e-05, "loss": 0.5089, "step": 2478 }, { "epoch": 0.93, "learning_rate": 1.2846306047422723e-05, "loss": 0.5171, "step": 2479 }, { "epoch": 0.94, "learning_rate": 1.2839995672663773e-05, "loss": 0.6185, "step": 2480 }, { "epoch": 0.94, "learning_rate": 1.2833684067538213e-05, "loss": 0.6968, "step": 2481 }, { "epoch": 0.94, "learning_rate": 1.2827371234780393e-05, "loss": 0.5353, "step": 2482 }, { "epoch": 0.94, "learning_rate": 1.2821057177125222e-05, "loss": 0.5857, "step": 2483 }, { "epoch": 0.94, "learning_rate": 1.2814741897308129e-05, "loss": 0.5953, "step": 2484 }, { "epoch": 0.94, "learning_rate": 1.2808425398065067e-05, "loss": 0.6211, "step": 2485 }, { "epoch": 0.94, "learning_rate": 1.2802107682132525e-05, "loss": 0.5791, "step": 2486 }, { "epoch": 0.94, "learning_rate": 1.279578875224752e-05, "loss": 0.5478, "step": 2487 }, { "epoch": 0.94, "learning_rate": 1.2789468611147586e-05, "loss": 0.5785, "step": 2488 }, { "epoch": 0.94, "learning_rate": 1.2783147261570789e-05, "loss": 0.6193, "step": 2489 }, { "epoch": 0.94, "learning_rate": 1.2776824706255719e-05, "loss": 0.5697, "step": 2490 }, { "epoch": 0.94, "learning_rate": 1.2770500947941484e-05, "loss": 0.56, "step": 2491 }, { "epoch": 0.94, "learning_rate": 1.2764175989367717e-05, "loss": 0.6023, "step": 2492 }, { "epoch": 0.94, "learning_rate": 1.2757849833274566e-05, "loss": 0.5836, "step": 2493 }, { "epoch": 0.94, "learning_rate": 1.2751522482402706e-05, "loss": 0.6052, "step": 2494 }, { "epoch": 0.94, "learning_rate": 1.274519393949332e-05, "loss": 0.5689, "step": 2495 }, { "epoch": 0.94, "learning_rate": 1.2738864207288113e-05, "loss": 0.6294, "step": 2496 }, { "epoch": 0.94, "learning_rate": 1.2732533288529308e-05, "loss": 0.5611, "step": 2497 }, { "epoch": 0.94, "learning_rate": 1.2726201185959634e-05, "loss": 0.5515, "step": 2498 }, { "epoch": 0.94, "learning_rate": 1.271986790232234e-05, "loss": 0.5909, "step": 2499 }, { "epoch": 0.94, "learning_rate": 1.2713533440361184e-05, "loss": 0.6242, "step": 2500 }, { "epoch": 0.94, "learning_rate": 1.270719780282043e-05, "loss": 0.5704, "step": 2501 }, { "epoch": 0.94, "learning_rate": 1.2700860992444865e-05, "loss": 0.5552, "step": 2502 }, { "epoch": 0.94, "learning_rate": 1.2694523011979766e-05, "loss": 0.5703, "step": 2503 }, { "epoch": 0.94, "learning_rate": 1.2688183864170931e-05, "loss": 0.5773, "step": 2504 }, { "epoch": 0.94, "learning_rate": 1.2681843551764655e-05, "loss": 0.5303, "step": 2505 }, { "epoch": 0.94, "learning_rate": 1.2675502077507744e-05, "loss": 0.5387, "step": 2506 }, { "epoch": 0.95, "learning_rate": 1.2669159444147501e-05, "loss": 0.6081, "step": 2507 }, { "epoch": 0.95, "learning_rate": 1.2662815654431739e-05, "loss": 0.5907, "step": 2508 }, { "epoch": 0.95, "learning_rate": 1.2656470711108763e-05, "loss": 0.5526, "step": 2509 }, { "epoch": 0.95, "learning_rate": 1.2650124616927386e-05, "loss": 0.569, "step": 2510 }, { "epoch": 0.95, "learning_rate": 1.2643777374636914e-05, "loss": 0.5492, "step": 2511 }, { "epoch": 0.95, "learning_rate": 1.2637428986987153e-05, "loss": 0.502, "step": 2512 }, { "epoch": 0.95, "learning_rate": 1.2631079456728403e-05, "loss": 0.526, "step": 2513 }, { "epoch": 0.95, "learning_rate": 1.2624728786611459e-05, "loss": 0.5907, "step": 2514 }, { "epoch": 0.95, "learning_rate": 1.2618376979387619e-05, "loss": 0.5758, "step": 2515 }, { "epoch": 0.95, "learning_rate": 1.2612024037808657e-05, "loss": 0.6028, "step": 2516 }, { "epoch": 0.95, "learning_rate": 1.2605669964626852e-05, "loss": 0.5653, "step": 2517 }, { "epoch": 0.95, "learning_rate": 1.2599314762594965e-05, "loss": 0.6337, "step": 2518 }, { "epoch": 0.95, "learning_rate": 1.2592958434466256e-05, "loss": 0.5989, "step": 2519 }, { "epoch": 0.95, "learning_rate": 1.2586600982994458e-05, "loss": 0.6094, "step": 2520 }, { "epoch": 0.95, "learning_rate": 1.258024241093381e-05, "loss": 0.6108, "step": 2521 }, { "epoch": 0.95, "learning_rate": 1.2573882721039015e-05, "loss": 0.576, "step": 2522 }, { "epoch": 0.95, "learning_rate": 1.2567521916065276e-05, "loss": 0.6675, "step": 2523 }, { "epoch": 0.95, "learning_rate": 1.2561159998768275e-05, "loss": 0.6069, "step": 2524 }, { "epoch": 0.95, "learning_rate": 1.2554796971904174e-05, "loss": 0.5548, "step": 2525 }, { "epoch": 0.95, "learning_rate": 1.254843283822961e-05, "loss": 0.5891, "step": 2526 }, { "epoch": 0.95, "learning_rate": 1.2542067600501717e-05, "loss": 0.5416, "step": 2527 }, { "epoch": 0.95, "learning_rate": 1.253570126147809e-05, "loss": 0.5703, "step": 2528 }, { "epoch": 0.95, "learning_rate": 1.2529333823916807e-05, "loss": 0.5784, "step": 2529 }, { "epoch": 0.95, "learning_rate": 1.2522965290576427e-05, "loss": 0.5434, "step": 2530 }, { "epoch": 0.95, "learning_rate": 1.2516595664215977e-05, "loss": 0.5738, "step": 2531 }, { "epoch": 0.95, "learning_rate": 1.2510224947594955e-05, "loss": 0.6199, "step": 2532 }, { "epoch": 0.96, "learning_rate": 1.2503853143473346e-05, "loss": 0.6198, "step": 2533 }, { "epoch": 0.96, "learning_rate": 1.2497480254611584e-05, "loss": 0.533, "step": 2534 }, { "epoch": 0.96, "learning_rate": 1.2491106283770596e-05, "loss": 0.5367, "step": 2535 }, { "epoch": 0.96, "learning_rate": 1.248473123371176e-05, "loss": 0.5497, "step": 2536 }, { "epoch": 0.96, "learning_rate": 1.2478355107196931e-05, "loss": 0.5728, "step": 2537 }, { "epoch": 0.96, "learning_rate": 1.2471977906988427e-05, "loss": 0.5577, "step": 2538 }, { "epoch": 0.96, "learning_rate": 1.2465599635849031e-05, "loss": 0.5796, "step": 2539 }, { "epoch": 0.96, "learning_rate": 1.2459220296541994e-05, "loss": 0.5973, "step": 2540 }, { "epoch": 0.96, "learning_rate": 1.2452839891831021e-05, "loss": 0.5857, "step": 2541 }, { "epoch": 0.96, "learning_rate": 1.244645842448029e-05, "loss": 0.5822, "step": 2542 }, { "epoch": 0.96, "learning_rate": 1.2440075897254432e-05, "loss": 0.5776, "step": 2543 }, { "epoch": 0.96, "learning_rate": 1.2433692312918537e-05, "loss": 0.6104, "step": 2544 }, { "epoch": 0.96, "learning_rate": 1.2427307674238156e-05, "loss": 0.5739, "step": 2545 }, { "epoch": 0.96, "learning_rate": 1.2420921983979294e-05, "loss": 0.5386, "step": 2546 }, { "epoch": 0.96, "learning_rate": 1.2414535244908415e-05, "loss": 0.5708, "step": 2547 }, { "epoch": 0.96, "learning_rate": 1.240814745979243e-05, "loss": 0.6084, "step": 2548 }, { "epoch": 0.96, "learning_rate": 1.2401758631398719e-05, "loss": 0.6142, "step": 2549 }, { "epoch": 0.96, "learning_rate": 1.2395368762495094e-05, "loss": 0.5486, "step": 2550 }, { "epoch": 0.96, "learning_rate": 1.2388977855849828e-05, "loss": 0.5735, "step": 2551 }, { "epoch": 0.96, "learning_rate": 1.238258591423165e-05, "loss": 0.6641, "step": 2552 }, { "epoch": 0.96, "learning_rate": 1.2376192940409729e-05, "loss": 0.5672, "step": 2553 }, { "epoch": 0.96, "learning_rate": 1.2369798937153673e-05, "loss": 0.5765, "step": 2554 }, { "epoch": 0.96, "learning_rate": 1.2363403907233553e-05, "loss": 0.5196, "step": 2555 }, { "epoch": 0.96, "learning_rate": 1.235700785341988e-05, "loss": 0.6066, "step": 2556 }, { "epoch": 0.96, "learning_rate": 1.2350610778483598e-05, "loss": 0.5914, "step": 2557 }, { "epoch": 0.96, "learning_rate": 1.2344212685196104e-05, "loss": 0.5654, "step": 2558 }, { "epoch": 0.96, "learning_rate": 1.2337813576329237e-05, "loss": 0.5427, "step": 2559 }, { "epoch": 0.97, "learning_rate": 1.233141345465527e-05, "loss": 0.5711, "step": 2560 }, { "epoch": 0.97, "learning_rate": 1.2325012322946913e-05, "loss": 0.6035, "step": 2561 }, { "epoch": 0.97, "learning_rate": 1.2318610183977321e-05, "loss": 0.5386, "step": 2562 }, { "epoch": 0.97, "learning_rate": 1.2312207040520087e-05, "loss": 0.5928, "step": 2563 }, { "epoch": 0.97, "learning_rate": 1.2305802895349228e-05, "loss": 0.5575, "step": 2564 }, { "epoch": 0.97, "learning_rate": 1.2299397751239204e-05, "loss": 0.5739, "step": 2565 }, { "epoch": 0.97, "learning_rate": 1.2292991610964902e-05, "loss": 0.5546, "step": 2566 }, { "epoch": 0.97, "learning_rate": 1.2286584477301646e-05, "loss": 0.5182, "step": 2567 }, { "epoch": 0.97, "learning_rate": 1.2280176353025186e-05, "loss": 0.5234, "step": 2568 }, { "epoch": 0.97, "learning_rate": 1.2273767240911707e-05, "loss": 0.5505, "step": 2569 }, { "epoch": 0.97, "learning_rate": 1.2267357143737813e-05, "loss": 0.6338, "step": 2570 }, { "epoch": 0.97, "learning_rate": 1.2260946064280543e-05, "loss": 0.5119, "step": 2571 }, { "epoch": 0.97, "learning_rate": 1.2254534005317358e-05, "loss": 0.5915, "step": 2572 }, { "epoch": 0.97, "learning_rate": 1.224812096962614e-05, "loss": 0.556, "step": 2573 }, { "epoch": 0.97, "learning_rate": 1.2241706959985207e-05, "loss": 0.5333, "step": 2574 }, { "epoch": 0.97, "learning_rate": 1.2235291979173279e-05, "loss": 0.6282, "step": 2575 }, { "epoch": 0.97, "learning_rate": 1.2228876029969511e-05, "loss": 0.5689, "step": 2576 }, { "epoch": 0.97, "learning_rate": 1.2222459115153478e-05, "loss": 0.5399, "step": 2577 }, { "epoch": 0.97, "learning_rate": 1.2216041237505167e-05, "loss": 0.6027, "step": 2578 }, { "epoch": 0.97, "learning_rate": 1.2209622399804983e-05, "loss": 0.5505, "step": 2579 }, { "epoch": 0.97, "learning_rate": 1.2203202604833748e-05, "loss": 0.614, "step": 2580 }, { "epoch": 0.97, "learning_rate": 1.2196781855372703e-05, "loss": 0.5637, "step": 2581 }, { "epoch": 0.97, "learning_rate": 1.2190360154203498e-05, "loss": 0.5487, "step": 2582 }, { "epoch": 0.97, "learning_rate": 1.2183937504108189e-05, "loss": 0.5991, "step": 2583 }, { "epoch": 0.97, "learning_rate": 1.2177513907869261e-05, "loss": 0.6162, "step": 2584 }, { "epoch": 0.97, "learning_rate": 1.2171089368269591e-05, "loss": 0.5735, "step": 2585 }, { "epoch": 0.98, "learning_rate": 1.2164663888092469e-05, "loss": 0.5933, "step": 2586 }, { "epoch": 0.98, "learning_rate": 1.2158237470121601e-05, "loss": 0.5414, "step": 2587 }, { "epoch": 0.98, "learning_rate": 1.2151810117141092e-05, "loss": 0.566, "step": 2588 }, { "epoch": 0.98, "learning_rate": 1.2145381831935454e-05, "loss": 0.5739, "step": 2589 }, { "epoch": 0.98, "learning_rate": 1.21389526172896e-05, "loss": 0.5301, "step": 2590 }, { "epoch": 0.98, "learning_rate": 1.2132522475988848e-05, "loss": 0.5697, "step": 2591 }, { "epoch": 0.98, "learning_rate": 1.212609141081892e-05, "loss": 0.5597, "step": 2592 }, { "epoch": 0.98, "learning_rate": 1.2119659424565935e-05, "loss": 0.5544, "step": 2593 }, { "epoch": 0.98, "learning_rate": 1.2113226520016412e-05, "loss": 0.5163, "step": 2594 }, { "epoch": 0.98, "learning_rate": 1.2106792699957264e-05, "loss": 0.5525, "step": 2595 }, { "epoch": 0.98, "learning_rate": 1.2100357967175808e-05, "loss": 0.5833, "step": 2596 }, { "epoch": 0.98, "learning_rate": 1.2093922324459753e-05, "loss": 0.572, "step": 2597 }, { "epoch": 0.98, "learning_rate": 1.2087485774597197e-05, "loss": 0.6213, "step": 2598 }, { "epoch": 0.98, "learning_rate": 1.2081048320376644e-05, "loss": 0.6117, "step": 2599 }, { "epoch": 0.98, "learning_rate": 1.2074609964586974e-05, "loss": 0.6131, "step": 2600 }, { "epoch": 0.98, "learning_rate": 1.2068170710017469e-05, "loss": 0.5837, "step": 2601 }, { "epoch": 0.98, "learning_rate": 1.2061730559457793e-05, "loss": 0.5744, "step": 2602 }, { "epoch": 0.98, "learning_rate": 1.2055289515698008e-05, "loss": 0.5663, "step": 2603 }, { "epoch": 0.98, "learning_rate": 1.2048847581528551e-05, "loss": 0.5375, "step": 2604 }, { "epoch": 0.98, "learning_rate": 1.2042404759740252e-05, "loss": 0.6007, "step": 2605 }, { "epoch": 0.98, "learning_rate": 1.2035961053124325e-05, "loss": 0.521, "step": 2606 }, { "epoch": 0.98, "learning_rate": 1.2029516464472368e-05, "loss": 0.4965, "step": 2607 }, { "epoch": 0.98, "learning_rate": 1.2023070996576353e-05, "loss": 0.5548, "step": 2608 }, { "epoch": 0.98, "learning_rate": 1.2016624652228648e-05, "loss": 0.5781, "step": 2609 }, { "epoch": 0.98, "learning_rate": 1.2010177434221989e-05, "loss": 0.5512, "step": 2610 }, { "epoch": 0.98, "learning_rate": 1.2003729345349486e-05, "loss": 0.5161, "step": 2611 }, { "epoch": 0.98, "learning_rate": 1.1997280388404644e-05, "loss": 0.6343, "step": 2612 }, { "epoch": 0.99, "learning_rate": 1.199083056618133e-05, "loss": 0.5749, "step": 2613 }, { "epoch": 0.99, "learning_rate": 1.1984379881473789e-05, "loss": 0.6388, "step": 2614 }, { "epoch": 0.99, "learning_rate": 1.1977928337076644e-05, "loss": 0.593, "step": 2615 }, { "epoch": 0.99, "learning_rate": 1.197147593578488e-05, "loss": 0.5335, "step": 2616 }, { "epoch": 0.99, "learning_rate": 1.1965022680393867e-05, "loss": 0.5357, "step": 2617 }, { "epoch": 0.99, "learning_rate": 1.1958568573699337e-05, "loss": 0.5791, "step": 2618 }, { "epoch": 0.99, "learning_rate": 1.1952113618497387e-05, "loss": 0.5734, "step": 2619 }, { "epoch": 0.99, "learning_rate": 1.194565781758449e-05, "loss": 0.584, "step": 2620 }, { "epoch": 0.99, "learning_rate": 1.1939201173757486e-05, "loss": 0.543, "step": 2621 }, { "epoch": 0.99, "learning_rate": 1.1932743689813569e-05, "loss": 0.5214, "step": 2622 }, { "epoch": 0.99, "learning_rate": 1.1926285368550307e-05, "loss": 0.6195, "step": 2623 }, { "epoch": 0.99, "learning_rate": 1.191982621276563e-05, "loss": 0.5659, "step": 2624 }, { "epoch": 0.99, "learning_rate": 1.1913366225257823e-05, "loss": 0.6098, "step": 2625 }, { "epoch": 0.99, "learning_rate": 1.1906905408825539e-05, "loss": 0.5742, "step": 2626 }, { "epoch": 0.99, "learning_rate": 1.1900443766267785e-05, "loss": 0.5696, "step": 2627 }, { "epoch": 0.99, "learning_rate": 1.189398130038393e-05, "loss": 0.5997, "step": 2628 }, { "epoch": 0.99, "learning_rate": 1.1887518013973694e-05, "loss": 0.5726, "step": 2629 }, { "epoch": 0.99, "learning_rate": 1.1881053909837157e-05, "loss": 0.5837, "step": 2630 }, { "epoch": 0.99, "learning_rate": 1.1874588990774754e-05, "loss": 0.5571, "step": 2631 }, { "epoch": 0.99, "learning_rate": 1.1868123259587272e-05, "loss": 0.5911, "step": 2632 }, { "epoch": 0.99, "learning_rate": 1.1861656719075842e-05, "loss": 0.584, "step": 2633 }, { "epoch": 0.99, "learning_rate": 1.1855189372041962e-05, "loss": 0.6112, "step": 2634 }, { "epoch": 0.99, "learning_rate": 1.1848721221287462e-05, "loss": 0.5807, "step": 2635 }, { "epoch": 0.99, "learning_rate": 1.1842252269614536e-05, "loss": 0.5935, "step": 2636 }, { "epoch": 0.99, "learning_rate": 1.183578251982571e-05, "loss": 0.5528, "step": 2637 }, { "epoch": 0.99, "learning_rate": 1.1829311974723868e-05, "loss": 0.5632, "step": 2638 }, { "epoch": 1.0, "learning_rate": 1.1822840637112232e-05, "loss": 0.5971, "step": 2639 }, { "epoch": 1.0, "learning_rate": 1.1816368509794365e-05, "loss": 0.5587, "step": 2640 }, { "epoch": 1.0, "learning_rate": 1.1809895595574181e-05, "loss": 0.5704, "step": 2641 }, { "epoch": 1.0, "learning_rate": 1.1803421897255931e-05, "loss": 0.5821, "step": 2642 }, { "epoch": 1.0, "learning_rate": 1.1796947417644198e-05, "loss": 0.5402, "step": 2643 }, { "epoch": 1.0, "learning_rate": 1.1790472159543917e-05, "loss": 0.6167, "step": 2644 }, { "epoch": 1.0, "learning_rate": 1.1783996125760349e-05, "loss": 0.6026, "step": 2645 }, { "epoch": 1.0, "learning_rate": 1.1777519319099096e-05, "loss": 0.5429, "step": 2646 }, { "epoch": 1.0, "learning_rate": 1.1771041742366096e-05, "loss": 0.5786, "step": 2647 }, { "epoch": 1.0, "learning_rate": 1.1764563398367616e-05, "loss": 0.6392, "step": 2648 }, { "epoch": 1.0, "learning_rate": 1.1758084289910262e-05, "loss": 0.522, "step": 2649 }, { "epoch": 1.0, "learning_rate": 1.1751604419800966e-05, "loss": 0.6011, "step": 2650 }, { "epoch": 1.0, "learning_rate": 1.1745123790846989e-05, "loss": 0.5617, "step": 2651 }, { "epoch": 1.0, "learning_rate": 1.1738642405855923e-05, "loss": 0.6272, "step": 2652 }, { "epoch": 1.0, "learning_rate": 1.1732160267635696e-05, "loss": 0.5158, "step": 2653 }, { "epoch": 1.0, "learning_rate": 1.1725677378994542e-05, "loss": 0.5674, "step": 2654 }, { "epoch": 1.0, "learning_rate": 1.1719193742741037e-05, "loss": 0.5954, "step": 2655 }, { "epoch": 1.0, "learning_rate": 1.1712709361684078e-05, "loss": 0.5556, "step": 2656 }, { "epoch": 1.0, "learning_rate": 1.1706224238632883e-05, "loss": 0.5631, "step": 2657 }, { "epoch": 1.0, "learning_rate": 1.1699738376396988e-05, "loss": 0.5303, "step": 2658 }, { "epoch": 1.0, "learning_rate": 1.1693251777786253e-05, "loss": 0.5444, "step": 2659 }, { "epoch": 1.0, "learning_rate": 1.1686764445610862e-05, "loss": 0.5666, "step": 2660 }, { "epoch": 1.0, "learning_rate": 1.1680276382681302e-05, "loss": 0.5452, "step": 2661 }, { "epoch": 1.0, "learning_rate": 1.1673787591808389e-05, "loss": 0.5161, "step": 2662 }, { "epoch": 1.0, "learning_rate": 1.1667298075803256e-05, "loss": 0.5925, "step": 2663 }, { "epoch": 1.0, "learning_rate": 1.166080783747734e-05, "loss": 0.5399, "step": 2664 }, { "epoch": 1.0, "learning_rate": 1.1654316879642396e-05, "loss": 0.5629, "step": 2665 }, { "epoch": 1.01, "learning_rate": 1.1647825205110492e-05, "loss": 0.5416, "step": 2666 }, { "epoch": 1.01, "learning_rate": 1.1641332816694008e-05, "loss": 0.5407, "step": 2667 }, { "epoch": 1.01, "learning_rate": 1.1634839717205628e-05, "loss": 0.5709, "step": 2668 }, { "epoch": 1.01, "learning_rate": 1.1628345909458347e-05, "loss": 0.511, "step": 2669 }, { "epoch": 1.01, "learning_rate": 1.162185139626547e-05, "loss": 0.5878, "step": 2670 }, { "epoch": 1.01, "learning_rate": 1.1615356180440598e-05, "loss": 0.5476, "step": 2671 }, { "epoch": 1.01, "learning_rate": 1.1608860264797645e-05, "loss": 0.5778, "step": 2672 }, { "epoch": 1.01, "learning_rate": 1.1602363652150827e-05, "loss": 0.5247, "step": 2673 }, { "epoch": 1.01, "learning_rate": 1.1595866345314659e-05, "loss": 0.5917, "step": 2674 }, { "epoch": 1.01, "learning_rate": 1.1589368347103961e-05, "loss": 0.5187, "step": 2675 }, { "epoch": 1.01, "learning_rate": 1.158286966033385e-05, "loss": 0.5774, "step": 2676 }, { "epoch": 1.01, "learning_rate": 1.1576370287819737e-05, "loss": 0.6178, "step": 2677 }, { "epoch": 1.01, "learning_rate": 1.1569870232377338e-05, "loss": 0.5715, "step": 2678 }, { "epoch": 1.01, "learning_rate": 1.1563369496822664e-05, "loss": 0.5998, "step": 2679 }, { "epoch": 1.01, "learning_rate": 1.155686808397201e-05, "loss": 0.5851, "step": 2680 }, { "epoch": 1.01, "learning_rate": 1.155036599664198e-05, "loss": 0.5919, "step": 2681 }, { "epoch": 1.01, "learning_rate": 1.154386323764946e-05, "loss": 0.6343, "step": 2682 }, { "epoch": 1.01, "learning_rate": 1.1537359809811623e-05, "loss": 0.5316, "step": 2683 }, { "epoch": 1.01, "learning_rate": 1.1530855715945947e-05, "loss": 0.5542, "step": 2684 }, { "epoch": 1.01, "learning_rate": 1.152435095887019e-05, "loss": 0.5413, "step": 2685 }, { "epoch": 1.01, "learning_rate": 1.1517845541402392e-05, "loss": 0.5548, "step": 2686 }, { "epoch": 1.01, "learning_rate": 1.1511339466360885e-05, "loss": 0.5273, "step": 2687 }, { "epoch": 1.01, "learning_rate": 1.1504832736564287e-05, "loss": 0.5459, "step": 2688 }, { "epoch": 1.01, "learning_rate": 1.1498325354831495e-05, "loss": 0.5463, "step": 2689 }, { "epoch": 1.01, "learning_rate": 1.1491817323981695e-05, "loss": 0.544, "step": 2690 }, { "epoch": 1.01, "learning_rate": 1.1485308646834347e-05, "loss": 0.5418, "step": 2691 }, { "epoch": 1.02, "learning_rate": 1.14787993262092e-05, "loss": 0.5908, "step": 2692 }, { "epoch": 1.02, "learning_rate": 1.147228936492627e-05, "loss": 0.6089, "step": 2693 }, { "epoch": 1.02, "learning_rate": 1.146577876580586e-05, "loss": 0.5337, "step": 2694 }, { "epoch": 1.02, "learning_rate": 1.1459267531668543e-05, "loss": 0.5604, "step": 2695 }, { "epoch": 1.02, "learning_rate": 1.1452755665335173e-05, "loss": 0.5714, "step": 2696 }, { "epoch": 1.02, "learning_rate": 1.1446243169626874e-05, "loss": 0.5617, "step": 2697 }, { "epoch": 1.02, "learning_rate": 1.1439730047365046e-05, "loss": 0.5125, "step": 2698 }, { "epoch": 1.02, "learning_rate": 1.1433216301371353e-05, "loss": 0.5709, "step": 2699 }, { "epoch": 1.02, "learning_rate": 1.1426701934467742e-05, "loss": 0.5812, "step": 2700 }, { "epoch": 1.02, "learning_rate": 1.1420186949476413e-05, "loss": 0.552, "step": 2701 }, { "epoch": 1.02, "learning_rate": 1.1413671349219844e-05, "loss": 0.538, "step": 2702 }, { "epoch": 1.02, "learning_rate": 1.1407155136520783e-05, "loss": 0.5703, "step": 2703 }, { "epoch": 1.02, "learning_rate": 1.1400638314202236e-05, "loss": 0.5168, "step": 2704 }, { "epoch": 1.02, "learning_rate": 1.1394120885087472e-05, "loss": 0.5965, "step": 2705 }, { "epoch": 1.02, "learning_rate": 1.1387602852000028e-05, "loss": 0.5881, "step": 2706 }, { "epoch": 1.02, "learning_rate": 1.13810842177637e-05, "loss": 0.5037, "step": 2707 }, { "epoch": 1.02, "learning_rate": 1.137456498520255e-05, "loss": 0.5126, "step": 2708 }, { "epoch": 1.02, "learning_rate": 1.1368045157140885e-05, "loss": 0.5551, "step": 2709 }, { "epoch": 1.02, "learning_rate": 1.1361524736403287e-05, "loss": 0.5543, "step": 2710 }, { "epoch": 1.02, "learning_rate": 1.1355003725814584e-05, "loss": 0.5534, "step": 2711 }, { "epoch": 1.02, "learning_rate": 1.134848212819986e-05, "loss": 0.5681, "step": 2712 }, { "epoch": 1.02, "learning_rate": 1.1341959946384463e-05, "loss": 0.5652, "step": 2713 }, { "epoch": 1.02, "learning_rate": 1.133543718319398e-05, "loss": 0.5044, "step": 2714 }, { "epoch": 1.02, "learning_rate": 1.1328913841454261e-05, "loss": 0.529, "step": 2715 }, { "epoch": 1.02, "learning_rate": 1.1322389923991403e-05, "loss": 0.5554, "step": 2716 }, { "epoch": 1.02, "learning_rate": 1.1315865433631748e-05, "loss": 0.5435, "step": 2717 }, { "epoch": 1.02, "learning_rate": 1.1309340373201893e-05, "loss": 0.5257, "step": 2718 }, { "epoch": 1.03, "learning_rate": 1.1302814745528679e-05, "loss": 0.5714, "step": 2719 }, { "epoch": 1.03, "learning_rate": 1.1296288553439193e-05, "loss": 0.5082, "step": 2720 }, { "epoch": 1.03, "learning_rate": 1.1289761799760763e-05, "loss": 0.5613, "step": 2721 }, { "epoch": 1.03, "learning_rate": 1.128323448732097e-05, "loss": 0.5958, "step": 2722 }, { "epoch": 1.03, "learning_rate": 1.1276706618947627e-05, "loss": 0.5939, "step": 2723 }, { "epoch": 1.03, "learning_rate": 1.1270178197468788e-05, "loss": 0.5455, "step": 2724 }, { "epoch": 1.03, "learning_rate": 1.1263649225712758e-05, "loss": 0.5109, "step": 2725 }, { "epoch": 1.03, "learning_rate": 1.1257119706508068e-05, "loss": 0.5945, "step": 2726 }, { "epoch": 1.03, "learning_rate": 1.125058964268349e-05, "loss": 0.5653, "step": 2727 }, { "epoch": 1.03, "learning_rate": 1.1244059037068033e-05, "loss": 0.623, "step": 2728 }, { "epoch": 1.03, "learning_rate": 1.1237527892490945e-05, "loss": 0.6107, "step": 2729 }, { "epoch": 1.03, "learning_rate": 1.1230996211781697e-05, "loss": 0.5784, "step": 2730 }, { "epoch": 1.03, "learning_rate": 1.1224463997770002e-05, "loss": 0.5256, "step": 2731 }, { "epoch": 1.03, "learning_rate": 1.1217931253285797e-05, "loss": 0.5813, "step": 2732 }, { "epoch": 1.03, "learning_rate": 1.1211397981159258e-05, "loss": 0.5365, "step": 2733 }, { "epoch": 1.03, "learning_rate": 1.1204864184220775e-05, "loss": 0.4908, "step": 2734 }, { "epoch": 1.03, "learning_rate": 1.1198329865300982e-05, "loss": 0.5262, "step": 2735 }, { "epoch": 1.03, "learning_rate": 1.1191795027230727e-05, "loss": 0.554, "step": 2736 }, { "epoch": 1.03, "learning_rate": 1.1185259672841086e-05, "loss": 0.5341, "step": 2737 }, { "epoch": 1.03, "learning_rate": 1.1178723804963365e-05, "loss": 0.5339, "step": 2738 }, { "epoch": 1.03, "learning_rate": 1.1172187426429082e-05, "loss": 0.5358, "step": 2739 }, { "epoch": 1.03, "learning_rate": 1.1165650540069983e-05, "loss": 0.5678, "step": 2740 }, { "epoch": 1.03, "learning_rate": 1.1159113148718035e-05, "loss": 0.5283, "step": 2741 }, { "epoch": 1.03, "learning_rate": 1.1152575255205418e-05, "loss": 0.5602, "step": 2742 }, { "epoch": 1.03, "learning_rate": 1.1146036862364533e-05, "loss": 0.4731, "step": 2743 }, { "epoch": 1.03, "learning_rate": 1.1139497973027998e-05, "loss": 0.5715, "step": 2744 }, { "epoch": 1.04, "learning_rate": 1.1132958590028645e-05, "loss": 0.5408, "step": 2745 }, { "epoch": 1.04, "learning_rate": 1.1126418716199518e-05, "loss": 0.5662, "step": 2746 }, { "epoch": 1.04, "learning_rate": 1.1119878354373884e-05, "loss": 0.5334, "step": 2747 }, { "epoch": 1.04, "learning_rate": 1.11133375073852e-05, "loss": 0.5384, "step": 2748 }, { "epoch": 1.04, "learning_rate": 1.1106796178067158e-05, "loss": 0.5926, "step": 2749 }, { "epoch": 1.04, "learning_rate": 1.1100254369253641e-05, "loss": 0.5631, "step": 2750 }, { "epoch": 1.04, "learning_rate": 1.1093712083778748e-05, "loss": 0.5752, "step": 2751 }, { "epoch": 1.04, "learning_rate": 1.1087169324476778e-05, "loss": 0.5372, "step": 2752 }, { "epoch": 1.04, "learning_rate": 1.1080626094182245e-05, "loss": 0.5507, "step": 2753 }, { "epoch": 1.04, "learning_rate": 1.1074082395729866e-05, "loss": 0.5914, "step": 2754 }, { "epoch": 1.04, "learning_rate": 1.1067538231954546e-05, "loss": 0.5764, "step": 2755 }, { "epoch": 1.04, "learning_rate": 1.1060993605691408e-05, "loss": 0.5888, "step": 2756 }, { "epoch": 1.04, "learning_rate": 1.105444851977577e-05, "loss": 0.5737, "step": 2757 }, { "epoch": 1.04, "learning_rate": 1.1047902977043149e-05, "loss": 0.5243, "step": 2758 }, { "epoch": 1.04, "learning_rate": 1.1041356980329254e-05, "loss": 0.588, "step": 2759 }, { "epoch": 1.04, "learning_rate": 1.1034810532470004e-05, "loss": 0.5786, "step": 2760 }, { "epoch": 1.04, "learning_rate": 1.1028263636301502e-05, "loss": 0.5206, "step": 2761 }, { "epoch": 1.04, "learning_rate": 1.102171629466005e-05, "loss": 0.5887, "step": 2762 }, { "epoch": 1.04, "learning_rate": 1.101516851038214e-05, "loss": 0.5939, "step": 2763 }, { "epoch": 1.04, "learning_rate": 1.100862028630446e-05, "loss": 0.5333, "step": 2764 }, { "epoch": 1.04, "learning_rate": 1.1002071625263884e-05, "loss": 0.5628, "step": 2765 }, { "epoch": 1.04, "learning_rate": 1.099552253009748e-05, "loss": 0.5404, "step": 2766 }, { "epoch": 1.04, "learning_rate": 1.09889730036425e-05, "loss": 0.5664, "step": 2767 }, { "epoch": 1.04, "learning_rate": 1.0982423048736383e-05, "loss": 0.5944, "step": 2768 }, { "epoch": 1.04, "learning_rate": 1.097587266821676e-05, "loss": 0.5408, "step": 2769 }, { "epoch": 1.04, "learning_rate": 1.0969321864921439e-05, "loss": 0.5858, "step": 2770 }, { "epoch": 1.04, "learning_rate": 1.0962770641688412e-05, "loss": 0.4875, "step": 2771 }, { "epoch": 1.05, "learning_rate": 1.0956219001355856e-05, "loss": 0.5436, "step": 2772 }, { "epoch": 1.05, "learning_rate": 1.0949666946762127e-05, "loss": 0.5393, "step": 2773 }, { "epoch": 1.05, "learning_rate": 1.0943114480745762e-05, "loss": 0.5205, "step": 2774 }, { "epoch": 1.05, "learning_rate": 1.093656160614547e-05, "loss": 0.5074, "step": 2775 }, { "epoch": 1.05, "learning_rate": 1.0930008325800151e-05, "loss": 0.5243, "step": 2776 }, { "epoch": 1.05, "learning_rate": 1.0923454642548866e-05, "loss": 0.51, "step": 2777 }, { "epoch": 1.05, "learning_rate": 1.0916900559230853e-05, "loss": 0.5212, "step": 2778 }, { "epoch": 1.05, "learning_rate": 1.0910346078685534e-05, "loss": 0.5501, "step": 2779 }, { "epoch": 1.05, "learning_rate": 1.0903791203752492e-05, "loss": 0.5198, "step": 2780 }, { "epoch": 1.05, "learning_rate": 1.0897235937271482e-05, "loss": 0.5821, "step": 2781 }, { "epoch": 1.05, "learning_rate": 1.0890680282082435e-05, "loss": 0.5153, "step": 2782 }, { "epoch": 1.05, "learning_rate": 1.0884124241025451e-05, "loss": 0.5144, "step": 2783 }, { "epoch": 1.05, "learning_rate": 1.087756781694078e-05, "loss": 0.536, "step": 2784 }, { "epoch": 1.05, "learning_rate": 1.0871011012668863e-05, "loss": 0.5478, "step": 2785 }, { "epoch": 1.05, "learning_rate": 1.086445383105029e-05, "loss": 0.5878, "step": 2786 }, { "epoch": 1.05, "learning_rate": 1.0857896274925814e-05, "loss": 0.5672, "step": 2787 }, { "epoch": 1.05, "learning_rate": 1.0851338347136358e-05, "loss": 0.5794, "step": 2788 }, { "epoch": 1.05, "learning_rate": 1.0844780050523e-05, "loss": 0.5444, "step": 2789 }, { "epoch": 1.05, "learning_rate": 1.0838221387926983e-05, "loss": 0.5232, "step": 2790 }, { "epoch": 1.05, "learning_rate": 1.0831662362189704e-05, "loss": 0.5173, "step": 2791 }, { "epoch": 1.05, "learning_rate": 1.0825102976152715e-05, "loss": 0.5475, "step": 2792 }, { "epoch": 1.05, "learning_rate": 1.0818543232657734e-05, "loss": 0.6048, "step": 2793 }, { "epoch": 1.05, "learning_rate": 1.0811983134546624e-05, "loss": 0.5301, "step": 2794 }, { "epoch": 1.05, "learning_rate": 1.0805422684661409e-05, "loss": 0.5187, "step": 2795 }, { "epoch": 1.05, "learning_rate": 1.0798861885844257e-05, "loss": 0.5459, "step": 2796 }, { "epoch": 1.05, "learning_rate": 1.0792300740937493e-05, "loss": 0.5564, "step": 2797 }, { "epoch": 1.06, "learning_rate": 1.0785739252783596e-05, "loss": 0.5218, "step": 2798 }, { "epoch": 1.06, "learning_rate": 1.0779177424225182e-05, "loss": 0.5826, "step": 2799 }, { "epoch": 1.06, "learning_rate": 1.0772615258105024e-05, "loss": 0.5218, "step": 2800 }, { "epoch": 1.06, "learning_rate": 1.0766052757266043e-05, "loss": 0.5154, "step": 2801 }, { "epoch": 1.06, "learning_rate": 1.0759489924551293e-05, "loss": 0.5513, "step": 2802 }, { "epoch": 1.06, "learning_rate": 1.0752926762803979e-05, "loss": 0.5806, "step": 2803 }, { "epoch": 1.06, "learning_rate": 1.0746363274867455e-05, "loss": 0.5322, "step": 2804 }, { "epoch": 1.06, "learning_rate": 1.0739799463585207e-05, "loss": 0.5585, "step": 2805 }, { "epoch": 1.06, "learning_rate": 1.0733235331800862e-05, "loss": 0.5344, "step": 2806 }, { "epoch": 1.06, "learning_rate": 1.072667088235819e-05, "loss": 0.5848, "step": 2807 }, { "epoch": 1.06, "learning_rate": 1.07201061181011e-05, "loss": 0.5876, "step": 2808 }, { "epoch": 1.06, "learning_rate": 1.0713541041873623e-05, "loss": 0.5112, "step": 2809 }, { "epoch": 1.06, "learning_rate": 1.0706975656519946e-05, "loss": 0.5024, "step": 2810 }, { "epoch": 1.06, "learning_rate": 1.070040996488438e-05, "loss": 0.5433, "step": 2811 }, { "epoch": 1.06, "learning_rate": 1.0693843969811366e-05, "loss": 0.6235, "step": 2812 }, { "epoch": 1.06, "learning_rate": 1.0687277674145475e-05, "loss": 0.6241, "step": 2813 }, { "epoch": 1.06, "learning_rate": 1.0680711080731418e-05, "loss": 0.5702, "step": 2814 }, { "epoch": 1.06, "learning_rate": 1.0674144192414029e-05, "loss": 0.5927, "step": 2815 }, { "epoch": 1.06, "learning_rate": 1.066757701203827e-05, "loss": 0.5785, "step": 2816 }, { "epoch": 1.06, "learning_rate": 1.0661009542449228e-05, "loss": 0.5253, "step": 2817 }, { "epoch": 1.06, "learning_rate": 1.0654441786492116e-05, "loss": 0.5345, "step": 2818 }, { "epoch": 1.06, "learning_rate": 1.0647873747012274e-05, "loss": 0.5866, "step": 2819 }, { "epoch": 1.06, "learning_rate": 1.0641305426855164e-05, "loss": 0.5871, "step": 2820 }, { "epoch": 1.06, "learning_rate": 1.0634736828866366e-05, "loss": 0.5463, "step": 2821 }, { "epoch": 1.06, "learning_rate": 1.0628167955891584e-05, "loss": 0.5191, "step": 2822 }, { "epoch": 1.06, "learning_rate": 1.0621598810776642e-05, "loss": 0.5758, "step": 2823 }, { "epoch": 1.06, "learning_rate": 1.0615029396367474e-05, "loss": 0.5869, "step": 2824 }, { "epoch": 1.07, "learning_rate": 1.060845971551014e-05, "loss": 0.5675, "step": 2825 }, { "epoch": 1.07, "learning_rate": 1.0601889771050815e-05, "loss": 0.553, "step": 2826 }, { "epoch": 1.07, "learning_rate": 1.059531956583578e-05, "loss": 0.5812, "step": 2827 }, { "epoch": 1.07, "learning_rate": 1.0588749102711435e-05, "loss": 0.5513, "step": 2828 }, { "epoch": 1.07, "learning_rate": 1.0582178384524296e-05, "loss": 0.5391, "step": 2829 }, { "epoch": 1.07, "learning_rate": 1.0575607414120981e-05, "loss": 0.5255, "step": 2830 }, { "epoch": 1.07, "learning_rate": 1.0569036194348215e-05, "loss": 0.5309, "step": 2831 }, { "epoch": 1.07, "learning_rate": 1.0562464728052844e-05, "loss": 0.5498, "step": 2832 }, { "epoch": 1.07, "learning_rate": 1.0555893018081813e-05, "loss": 0.5427, "step": 2833 }, { "epoch": 1.07, "learning_rate": 1.0549321067282171e-05, "loss": 0.5391, "step": 2834 }, { "epoch": 1.07, "learning_rate": 1.0542748878501073e-05, "loss": 0.5445, "step": 2835 }, { "epoch": 1.07, "learning_rate": 1.0536176454585776e-05, "loss": 0.5383, "step": 2836 }, { "epoch": 1.07, "learning_rate": 1.0529603798383647e-05, "loss": 0.5669, "step": 2837 }, { "epoch": 1.07, "learning_rate": 1.0523030912742137e-05, "loss": 0.5653, "step": 2838 }, { "epoch": 1.07, "learning_rate": 1.0516457800508815e-05, "loss": 0.577, "step": 2839 }, { "epoch": 1.07, "learning_rate": 1.0509884464531334e-05, "loss": 0.5492, "step": 2840 }, { "epoch": 1.07, "learning_rate": 1.0503310907657456e-05, "loss": 0.5728, "step": 2841 }, { "epoch": 1.07, "learning_rate": 1.0496737132735025e-05, "loss": 0.537, "step": 2842 }, { "epoch": 1.07, "learning_rate": 1.0490163142611988e-05, "loss": 0.5901, "step": 2843 }, { "epoch": 1.07, "learning_rate": 1.0483588940136386e-05, "loss": 0.5859, "step": 2844 }, { "epoch": 1.07, "learning_rate": 1.047701452815635e-05, "loss": 0.5785, "step": 2845 }, { "epoch": 1.07, "learning_rate": 1.04704399095201e-05, "loss": 0.5465, "step": 2846 }, { "epoch": 1.07, "learning_rate": 1.0463865087075947e-05, "loss": 0.6067, "step": 2847 }, { "epoch": 1.07, "learning_rate": 1.0457290063672293e-05, "loss": 0.5936, "step": 2848 }, { "epoch": 1.07, "learning_rate": 1.0450714842157624e-05, "loss": 0.569, "step": 2849 }, { "epoch": 1.07, "learning_rate": 1.0444139425380506e-05, "loss": 0.5945, "step": 2850 }, { "epoch": 1.08, "learning_rate": 1.0437563816189606e-05, "loss": 0.4921, "step": 2851 }, { "epoch": 1.08, "learning_rate": 1.0430988017433659e-05, "loss": 0.5659, "step": 2852 }, { "epoch": 1.08, "learning_rate": 1.0424412031961485e-05, "loss": 0.559, "step": 2853 }, { "epoch": 1.08, "learning_rate": 1.041783586262199e-05, "loss": 0.547, "step": 2854 }, { "epoch": 1.08, "learning_rate": 1.041125951226416e-05, "loss": 0.5436, "step": 2855 }, { "epoch": 1.08, "learning_rate": 1.0404682983737051e-05, "loss": 0.5595, "step": 2856 }, { "epoch": 1.08, "learning_rate": 1.03981062798898e-05, "loss": 0.5316, "step": 2857 }, { "epoch": 1.08, "learning_rate": 1.0391529403571629e-05, "loss": 0.5347, "step": 2858 }, { "epoch": 1.08, "learning_rate": 1.0384952357631829e-05, "loss": 0.5426, "step": 2859 }, { "epoch": 1.08, "learning_rate": 1.0378375144919748e-05, "loss": 0.5659, "step": 2860 }, { "epoch": 1.08, "learning_rate": 1.0371797768284834e-05, "loss": 0.557, "step": 2861 }, { "epoch": 1.08, "learning_rate": 1.0365220230576592e-05, "loss": 0.5713, "step": 2862 }, { "epoch": 1.08, "learning_rate": 1.035864253464459e-05, "loss": 0.564, "step": 2863 }, { "epoch": 1.08, "learning_rate": 1.0352064683338479e-05, "loss": 0.5504, "step": 2864 }, { "epoch": 1.08, "learning_rate": 1.034548667950797e-05, "loss": 0.6266, "step": 2865 }, { "epoch": 1.08, "learning_rate": 1.0338908526002838e-05, "loss": 0.5955, "step": 2866 }, { "epoch": 1.08, "learning_rate": 1.0332330225672928e-05, "loss": 0.5315, "step": 2867 }, { "epoch": 1.08, "learning_rate": 1.0325751781368146e-05, "loss": 0.5234, "step": 2868 }, { "epoch": 1.08, "learning_rate": 1.0319173195938457e-05, "loss": 0.5266, "step": 2869 }, { "epoch": 1.08, "learning_rate": 1.0312594472233896e-05, "loss": 0.5315, "step": 2870 }, { "epoch": 1.08, "learning_rate": 1.030601561310455e-05, "loss": 0.5357, "step": 2871 }, { "epoch": 1.08, "learning_rate": 1.0299436621400563e-05, "loss": 0.5457, "step": 2872 }, { "epoch": 1.08, "learning_rate": 1.0292857499972151e-05, "loss": 0.5948, "step": 2873 }, { "epoch": 1.08, "learning_rate": 1.0286278251669565e-05, "loss": 0.5585, "step": 2874 }, { "epoch": 1.08, "learning_rate": 1.0279698879343128e-05, "loss": 0.5777, "step": 2875 }, { "epoch": 1.08, "learning_rate": 1.0273119385843211e-05, "loss": 0.5572, "step": 2876 }, { "epoch": 1.08, "learning_rate": 1.0266539774020234e-05, "loss": 0.5661, "step": 2877 }, { "epoch": 1.09, "learning_rate": 1.0259960046724672e-05, "loss": 0.5796, "step": 2878 }, { "epoch": 1.09, "learning_rate": 1.0253380206807049e-05, "loss": 0.5345, "step": 2879 }, { "epoch": 1.09, "learning_rate": 1.0246800257117944e-05, "loss": 0.5381, "step": 2880 }, { "epoch": 1.09, "learning_rate": 1.024022020050797e-05, "loss": 0.6311, "step": 2881 }, { "epoch": 1.09, "learning_rate": 1.0233640039827793e-05, "loss": 0.6096, "step": 2882 }, { "epoch": 1.09, "learning_rate": 1.022705977792813e-05, "loss": 0.5825, "step": 2883 }, { "epoch": 1.09, "learning_rate": 1.022047941765974e-05, "loss": 0.5382, "step": 2884 }, { "epoch": 1.09, "learning_rate": 1.0213898961873412e-05, "loss": 0.596, "step": 2885 }, { "epoch": 1.09, "learning_rate": 1.0207318413419996e-05, "loss": 0.5497, "step": 2886 }, { "epoch": 1.09, "learning_rate": 1.0200737775150365e-05, "loss": 0.5415, "step": 2887 }, { "epoch": 1.09, "learning_rate": 1.019415704991544e-05, "loss": 0.5729, "step": 2888 }, { "epoch": 1.09, "learning_rate": 1.018757624056618e-05, "loss": 0.5392, "step": 2889 }, { "epoch": 1.09, "learning_rate": 1.0180995349953576e-05, "loss": 0.5388, "step": 2890 }, { "epoch": 1.09, "learning_rate": 1.0174414380928655e-05, "loss": 0.5872, "step": 2891 }, { "epoch": 1.09, "learning_rate": 1.016783333634248e-05, "loss": 0.5631, "step": 2892 }, { "epoch": 1.09, "learning_rate": 1.016125221904615e-05, "loss": 0.5403, "step": 2893 }, { "epoch": 1.09, "learning_rate": 1.0154671031890786e-05, "loss": 0.5312, "step": 2894 }, { "epoch": 1.09, "learning_rate": 1.014808977772755e-05, "loss": 0.5776, "step": 2895 }, { "epoch": 1.09, "learning_rate": 1.0141508459407622e-05, "loss": 0.5603, "step": 2896 }, { "epoch": 1.09, "learning_rate": 1.0134927079782222e-05, "loss": 0.4989, "step": 2897 }, { "epoch": 1.09, "learning_rate": 1.0128345641702583e-05, "loss": 0.5194, "step": 2898 }, { "epoch": 1.09, "learning_rate": 1.0121764148019977e-05, "loss": 0.5553, "step": 2899 }, { "epoch": 1.09, "learning_rate": 1.0115182601585691e-05, "loss": 0.5486, "step": 2900 }, { "epoch": 1.09, "learning_rate": 1.0108601005251035e-05, "loss": 0.5389, "step": 2901 }, { "epoch": 1.09, "learning_rate": 1.010201936186735e-05, "loss": 0.6681, "step": 2902 }, { "epoch": 1.09, "learning_rate": 1.0095437674285981e-05, "loss": 0.5241, "step": 2903 }, { "epoch": 1.1, "learning_rate": 1.0088855945358308e-05, "loss": 0.5093, "step": 2904 }, { "epoch": 1.1, "learning_rate": 1.0082274177935724e-05, "loss": 0.5451, "step": 2905 }, { "epoch": 1.1, "learning_rate": 1.0075692374869632e-05, "loss": 0.5981, "step": 2906 }, { "epoch": 1.1, "learning_rate": 1.0069110539011458e-05, "loss": 0.5122, "step": 2907 }, { "epoch": 1.1, "learning_rate": 1.0062528673212639e-05, "loss": 0.5054, "step": 2908 }, { "epoch": 1.1, "learning_rate": 1.0055946780324631e-05, "loss": 0.5285, "step": 2909 }, { "epoch": 1.1, "learning_rate": 1.0049364863198885e-05, "loss": 0.538, "step": 2910 }, { "epoch": 1.1, "learning_rate": 1.0042782924686885e-05, "loss": 0.5953, "step": 2911 }, { "epoch": 1.1, "learning_rate": 1.0036200967640109e-05, "loss": 0.5281, "step": 2912 }, { "epoch": 1.1, "learning_rate": 1.0029618994910047e-05, "loss": 0.5302, "step": 2913 }, { "epoch": 1.1, "learning_rate": 1.0023037009348198e-05, "loss": 0.5507, "step": 2914 }, { "epoch": 1.1, "learning_rate": 1.0016455013806063e-05, "loss": 0.5221, "step": 2915 }, { "epoch": 1.1, "learning_rate": 1.0009873011135151e-05, "loss": 0.5583, "step": 2916 }, { "epoch": 1.1, "learning_rate": 1.000329100418697e-05, "loss": 0.5637, "step": 2917 }, { "epoch": 1.1, "learning_rate": 9.996708995813033e-06, "loss": 0.5423, "step": 2918 }, { "epoch": 1.1, "learning_rate": 9.990126988864852e-06, "loss": 0.5828, "step": 2919 }, { "epoch": 1.1, "learning_rate": 9.983544986193942e-06, "loss": 0.5058, "step": 2920 }, { "epoch": 1.1, "learning_rate": 9.976962990651802e-06, "loss": 0.5817, "step": 2921 }, { "epoch": 1.1, "learning_rate": 9.970381005089953e-06, "loss": 0.5965, "step": 2922 }, { "epoch": 1.1, "learning_rate": 9.963799032359894e-06, "loss": 0.5429, "step": 2923 }, { "epoch": 1.1, "learning_rate": 9.957217075313118e-06, "loss": 0.5946, "step": 2924 }, { "epoch": 1.1, "learning_rate": 9.950635136801118e-06, "loss": 0.5308, "step": 2925 }, { "epoch": 1.1, "learning_rate": 9.944053219675376e-06, "loss": 0.5699, "step": 2926 }, { "epoch": 1.1, "learning_rate": 9.937471326787363e-06, "loss": 0.5527, "step": 2927 }, { "epoch": 1.1, "learning_rate": 9.930889460988544e-06, "loss": 0.5543, "step": 2928 }, { "epoch": 1.1, "learning_rate": 9.924307625130371e-06, "loss": 0.5768, "step": 2929 }, { "epoch": 1.1, "learning_rate": 9.917725822064278e-06, "loss": 0.5446, "step": 2930 }, { "epoch": 1.11, "learning_rate": 9.911144054641695e-06, "loss": 0.5132, "step": 2931 }, { "epoch": 1.11, "learning_rate": 9.904562325714022e-06, "loss": 0.5499, "step": 2932 }, { "epoch": 1.11, "learning_rate": 9.897980638132657e-06, "loss": 0.5399, "step": 2933 }, { "epoch": 1.11, "learning_rate": 9.891398994748967e-06, "loss": 0.5534, "step": 2934 }, { "epoch": 1.11, "learning_rate": 9.884817398414312e-06, "loss": 0.6019, "step": 2935 }, { "epoch": 1.11, "learning_rate": 9.878235851980027e-06, "loss": 0.6063, "step": 2936 }, { "epoch": 1.11, "learning_rate": 9.871654358297418e-06, "loss": 0.5152, "step": 2937 }, { "epoch": 1.11, "learning_rate": 9.865072920217785e-06, "loss": 0.5474, "step": 2938 }, { "epoch": 1.11, "learning_rate": 9.858491540592383e-06, "loss": 0.5142, "step": 2939 }, { "epoch": 1.11, "learning_rate": 9.851910222272452e-06, "loss": 0.5328, "step": 2940 }, { "epoch": 1.11, "learning_rate": 9.845328968109216e-06, "loss": 0.5622, "step": 2941 }, { "epoch": 1.11, "learning_rate": 9.838747780953854e-06, "loss": 0.5733, "step": 2942 }, { "epoch": 1.11, "learning_rate": 9.832166663657522e-06, "loss": 0.5586, "step": 2943 }, { "epoch": 1.11, "learning_rate": 9.825585619071348e-06, "loss": 0.5418, "step": 2944 }, { "epoch": 1.11, "learning_rate": 9.81900465004643e-06, "loss": 0.5309, "step": 2945 }, { "epoch": 1.11, "learning_rate": 9.81242375943382e-06, "loss": 0.562, "step": 2946 }, { "epoch": 1.11, "learning_rate": 9.805842950084561e-06, "loss": 0.5339, "step": 2947 }, { "epoch": 1.11, "learning_rate": 9.799262224849638e-06, "loss": 0.5249, "step": 2948 }, { "epoch": 1.11, "learning_rate": 9.792681586580008e-06, "loss": 0.5417, "step": 2949 }, { "epoch": 1.11, "learning_rate": 9.786101038126592e-06, "loss": 0.5271, "step": 2950 }, { "epoch": 1.11, "learning_rate": 9.779520582340265e-06, "loss": 0.5307, "step": 2951 }, { "epoch": 1.11, "learning_rate": 9.772940222071873e-06, "loss": 0.508, "step": 2952 }, { "epoch": 1.11, "learning_rate": 9.766359960172208e-06, "loss": 0.5068, "step": 2953 }, { "epoch": 1.11, "learning_rate": 9.759779799492036e-06, "loss": 0.5903, "step": 2954 }, { "epoch": 1.11, "learning_rate": 9.75319974288206e-06, "loss": 0.5754, "step": 2955 }, { "epoch": 1.11, "learning_rate": 9.746619793192953e-06, "loss": 0.5358, "step": 2956 }, { "epoch": 1.12, "learning_rate": 9.740039953275333e-06, "loss": 0.5527, "step": 2957 }, { "epoch": 1.12, "learning_rate": 9.73346022597977e-06, "loss": 0.6081, "step": 2958 }, { "epoch": 1.12, "learning_rate": 9.72688061415679e-06, "loss": 0.5404, "step": 2959 }, { "epoch": 1.12, "learning_rate": 9.720301120656874e-06, "loss": 0.5337, "step": 2960 }, { "epoch": 1.12, "learning_rate": 9.713721748330436e-06, "loss": 0.5212, "step": 2961 }, { "epoch": 1.12, "learning_rate": 9.707142500027852e-06, "loss": 0.5338, "step": 2962 }, { "epoch": 1.12, "learning_rate": 9.70056337859944e-06, "loss": 0.5501, "step": 2963 }, { "epoch": 1.12, "learning_rate": 9.693984386895457e-06, "loss": 0.5247, "step": 2964 }, { "epoch": 1.12, "learning_rate": 9.687405527766106e-06, "loss": 0.5864, "step": 2965 }, { "epoch": 1.12, "learning_rate": 9.680826804061545e-06, "loss": 0.5547, "step": 2966 }, { "epoch": 1.12, "learning_rate": 9.674248218631858e-06, "loss": 0.5964, "step": 2967 }, { "epoch": 1.12, "learning_rate": 9.667669774327075e-06, "loss": 0.5075, "step": 2968 }, { "epoch": 1.12, "learning_rate": 9.661091473997166e-06, "loss": 0.5132, "step": 2969 }, { "epoch": 1.12, "learning_rate": 9.654513320492033e-06, "loss": 0.5795, "step": 2970 }, { "epoch": 1.12, "learning_rate": 9.647935316661526e-06, "loss": 0.5254, "step": 2971 }, { "epoch": 1.12, "learning_rate": 9.641357465355412e-06, "loss": 0.5221, "step": 2972 }, { "epoch": 1.12, "learning_rate": 9.634779769423412e-06, "loss": 0.5932, "step": 2973 }, { "epoch": 1.12, "learning_rate": 9.628202231715169e-06, "loss": 0.566, "step": 2974 }, { "epoch": 1.12, "learning_rate": 9.621624855080255e-06, "loss": 0.5387, "step": 2975 }, { "epoch": 1.12, "learning_rate": 9.615047642368178e-06, "loss": 0.5877, "step": 2976 }, { "epoch": 1.12, "learning_rate": 9.608470596428373e-06, "loss": 0.5694, "step": 2977 }, { "epoch": 1.12, "learning_rate": 9.6018937201102e-06, "loss": 0.5484, "step": 2978 }, { "epoch": 1.12, "learning_rate": 9.595317016262952e-06, "loss": 0.5739, "step": 2979 }, { "epoch": 1.12, "learning_rate": 9.588740487735843e-06, "loss": 0.6298, "step": 2980 }, { "epoch": 1.12, "learning_rate": 9.582164137378013e-06, "loss": 0.6063, "step": 2981 }, { "epoch": 1.12, "learning_rate": 9.57558796803852e-06, "loss": 0.5453, "step": 2982 }, { "epoch": 1.12, "learning_rate": 9.569011982566346e-06, "loss": 0.5234, "step": 2983 }, { "epoch": 1.13, "learning_rate": 9.562436183810397e-06, "loss": 0.5226, "step": 2984 }, { "epoch": 1.13, "learning_rate": 9.555860574619494e-06, "loss": 0.5776, "step": 2985 }, { "epoch": 1.13, "learning_rate": 9.54928515784238e-06, "loss": 0.5067, "step": 2986 }, { "epoch": 1.13, "learning_rate": 9.542709936327708e-06, "loss": 0.5617, "step": 2987 }, { "epoch": 1.13, "learning_rate": 9.536134912924057e-06, "loss": 0.5158, "step": 2988 }, { "epoch": 1.13, "learning_rate": 9.529560090479905e-06, "loss": 0.5049, "step": 2989 }, { "epoch": 1.13, "learning_rate": 9.522985471843651e-06, "loss": 0.5384, "step": 2990 }, { "epoch": 1.13, "learning_rate": 9.516411059863616e-06, "loss": 0.5238, "step": 2991 }, { "epoch": 1.13, "learning_rate": 9.509836857388015e-06, "loss": 0.5271, "step": 2992 }, { "epoch": 1.13, "learning_rate": 9.50326286726498e-06, "loss": 0.562, "step": 2993 }, { "epoch": 1.13, "learning_rate": 9.496689092342548e-06, "loss": 0.5333, "step": 2994 }, { "epoch": 1.13, "learning_rate": 9.490115535468668e-06, "loss": 0.5513, "step": 2995 }, { "epoch": 1.13, "learning_rate": 9.48354219949119e-06, "loss": 0.5583, "step": 2996 }, { "epoch": 1.13, "learning_rate": 9.476969087257864e-06, "loss": 0.5299, "step": 2997 }, { "epoch": 1.13, "learning_rate": 9.470396201616355e-06, "loss": 0.5576, "step": 2998 }, { "epoch": 1.13, "learning_rate": 9.463823545414225e-06, "loss": 0.5568, "step": 2999 }, { "epoch": 1.13, "learning_rate": 9.457251121498932e-06, "loss": 0.545, "step": 3000 }, { "epoch": 1.13, "learning_rate": 9.450678932717834e-06, "loss": 0.5374, "step": 3001 }, { "epoch": 1.13, "learning_rate": 9.44410698191819e-06, "loss": 0.5879, "step": 3002 }, { "epoch": 1.13, "learning_rate": 9.437535271947156e-06, "loss": 0.5551, "step": 3003 }, { "epoch": 1.13, "learning_rate": 9.430963805651787e-06, "loss": 0.552, "step": 3004 }, { "epoch": 1.13, "learning_rate": 9.424392585879022e-06, "loss": 0.6175, "step": 3005 }, { "epoch": 1.13, "learning_rate": 9.417821615475707e-06, "loss": 0.57, "step": 3006 }, { "epoch": 1.13, "learning_rate": 9.411250897288568e-06, "loss": 0.5273, "step": 3007 }, { "epoch": 1.13, "learning_rate": 9.404680434164223e-06, "loss": 0.5697, "step": 3008 }, { "epoch": 1.13, "learning_rate": 9.398110228949187e-06, "loss": 0.6007, "step": 3009 }, { "epoch": 1.13, "learning_rate": 9.391540284489862e-06, "loss": 0.5522, "step": 3010 }, { "epoch": 1.14, "learning_rate": 9.38497060363253e-06, "loss": 0.5852, "step": 3011 }, { "epoch": 1.14, "learning_rate": 9.378401189223363e-06, "loss": 0.4979, "step": 3012 }, { "epoch": 1.14, "learning_rate": 9.371832044108419e-06, "loss": 0.5721, "step": 3013 }, { "epoch": 1.14, "learning_rate": 9.36526317113364e-06, "loss": 0.5618, "step": 3014 }, { "epoch": 1.14, "learning_rate": 9.358694573144841e-06, "loss": 0.5267, "step": 3015 }, { "epoch": 1.14, "learning_rate": 9.352126252987727e-06, "loss": 0.5579, "step": 3016 }, { "epoch": 1.14, "learning_rate": 9.345558213507888e-06, "loss": 0.5583, "step": 3017 }, { "epoch": 1.14, "learning_rate": 9.338990457550777e-06, "loss": 0.5821, "step": 3018 }, { "epoch": 1.14, "learning_rate": 9.332422987961734e-06, "loss": 0.6165, "step": 3019 }, { "epoch": 1.14, "learning_rate": 9.325855807585973e-06, "loss": 0.5287, "step": 3020 }, { "epoch": 1.14, "learning_rate": 9.319288919268586e-06, "loss": 0.5208, "step": 3021 }, { "epoch": 1.14, "learning_rate": 9.312722325854525e-06, "loss": 0.593, "step": 3022 }, { "epoch": 1.14, "learning_rate": 9.306156030188636e-06, "loss": 0.5188, "step": 3023 }, { "epoch": 1.14, "learning_rate": 9.299590035115624e-06, "loss": 0.5132, "step": 3024 }, { "epoch": 1.14, "learning_rate": 9.293024343480056e-06, "loss": 0.5119, "step": 3025 }, { "epoch": 1.14, "learning_rate": 9.28645895812638e-06, "loss": 0.5475, "step": 3026 }, { "epoch": 1.14, "learning_rate": 9.279893881898905e-06, "loss": 0.576, "step": 3027 }, { "epoch": 1.14, "learning_rate": 9.27332911764181e-06, "loss": 0.5451, "step": 3028 }, { "epoch": 1.14, "learning_rate": 9.26676466819914e-06, "loss": 0.5663, "step": 3029 }, { "epoch": 1.14, "learning_rate": 9.260200536414795e-06, "loss": 0.5255, "step": 3030 }, { "epoch": 1.14, "learning_rate": 9.253636725132549e-06, "loss": 0.5351, "step": 3031 }, { "epoch": 1.14, "learning_rate": 9.247073237196023e-06, "loss": 0.4638, "step": 3032 }, { "epoch": 1.14, "learning_rate": 9.240510075448712e-06, "loss": 0.5621, "step": 3033 }, { "epoch": 1.14, "learning_rate": 9.23394724273396e-06, "loss": 0.5652, "step": 3034 }, { "epoch": 1.14, "learning_rate": 9.227384741894976e-06, "loss": 0.5612, "step": 3035 }, { "epoch": 1.14, "learning_rate": 9.22082257577482e-06, "loss": 0.5726, "step": 3036 }, { "epoch": 1.15, "learning_rate": 9.214260747216408e-06, "loss": 0.5197, "step": 3037 }, { "epoch": 1.15, "learning_rate": 9.207699259062508e-06, "loss": 0.5632, "step": 3038 }, { "epoch": 1.15, "learning_rate": 9.201138114155748e-06, "loss": 0.5703, "step": 3039 }, { "epoch": 1.15, "learning_rate": 9.194577315338598e-06, "loss": 0.5905, "step": 3040 }, { "epoch": 1.15, "learning_rate": 9.188016865453376e-06, "loss": 0.5428, "step": 3041 }, { "epoch": 1.15, "learning_rate": 9.18145676734227e-06, "loss": 0.548, "step": 3042 }, { "epoch": 1.15, "learning_rate": 9.174897023847289e-06, "loss": 0.5694, "step": 3043 }, { "epoch": 1.15, "learning_rate": 9.168337637810301e-06, "loss": 0.5926, "step": 3044 }, { "epoch": 1.15, "learning_rate": 9.16177861207302e-06, "loss": 0.5921, "step": 3045 }, { "epoch": 1.15, "learning_rate": 9.155219949477005e-06, "loss": 0.564, "step": 3046 }, { "epoch": 1.15, "learning_rate": 9.148661652863644e-06, "loss": 0.5573, "step": 3047 }, { "epoch": 1.15, "learning_rate": 9.142103725074187e-06, "loss": 0.5362, "step": 3048 }, { "epoch": 1.15, "learning_rate": 9.135546168949713e-06, "loss": 0.5429, "step": 3049 }, { "epoch": 1.15, "learning_rate": 9.128988987331138e-06, "loss": 0.5708, "step": 3050 }, { "epoch": 1.15, "learning_rate": 9.12243218305922e-06, "loss": 0.5087, "step": 3051 }, { "epoch": 1.15, "learning_rate": 9.115875758974555e-06, "loss": 0.5105, "step": 3052 }, { "epoch": 1.15, "learning_rate": 9.109319717917565e-06, "loss": 0.5627, "step": 3053 }, { "epoch": 1.15, "learning_rate": 9.10276406272852e-06, "loss": 0.6005, "step": 3054 }, { "epoch": 1.15, "learning_rate": 9.096208796247513e-06, "loss": 0.531, "step": 3055 }, { "epoch": 1.15, "learning_rate": 9.089653921314469e-06, "loss": 0.555, "step": 3056 }, { "epoch": 1.15, "learning_rate": 9.08309944076915e-06, "loss": 0.5375, "step": 3057 }, { "epoch": 1.15, "learning_rate": 9.076545357451141e-06, "loss": 0.5217, "step": 3058 }, { "epoch": 1.15, "learning_rate": 9.069991674199854e-06, "loss": 0.5731, "step": 3059 }, { "epoch": 1.15, "learning_rate": 9.063438393854531e-06, "loss": 0.5584, "step": 3060 }, { "epoch": 1.15, "learning_rate": 9.056885519254241e-06, "loss": 0.5671, "step": 3061 }, { "epoch": 1.15, "learning_rate": 9.050333053237875e-06, "loss": 0.5461, "step": 3062 }, { "epoch": 1.15, "learning_rate": 9.043780998644147e-06, "loss": 0.5332, "step": 3063 }, { "epoch": 1.16, "learning_rate": 9.037229358311593e-06, "loss": 0.5317, "step": 3064 }, { "epoch": 1.16, "learning_rate": 9.030678135078566e-06, "loss": 0.5725, "step": 3065 }, { "epoch": 1.16, "learning_rate": 9.02412733178324e-06, "loss": 0.6128, "step": 3066 }, { "epoch": 1.16, "learning_rate": 9.017576951263618e-06, "loss": 0.5495, "step": 3067 }, { "epoch": 1.16, "learning_rate": 9.011026996357504e-06, "loss": 0.5254, "step": 3068 }, { "epoch": 1.16, "learning_rate": 9.004477469902524e-06, "loss": 0.5495, "step": 3069 }, { "epoch": 1.16, "learning_rate": 8.99792837473612e-06, "loss": 0.6094, "step": 3070 }, { "epoch": 1.16, "learning_rate": 8.991379713695544e-06, "loss": 0.6183, "step": 3071 }, { "epoch": 1.16, "learning_rate": 8.984831489617862e-06, "loss": 0.5578, "step": 3072 }, { "epoch": 1.16, "learning_rate": 8.978283705339951e-06, "loss": 0.607, "step": 3073 }, { "epoch": 1.16, "learning_rate": 8.971736363698501e-06, "loss": 0.5698, "step": 3074 }, { "epoch": 1.16, "learning_rate": 8.96518946753e-06, "loss": 0.5239, "step": 3075 }, { "epoch": 1.16, "learning_rate": 8.958643019670748e-06, "loss": 0.5745, "step": 3076 }, { "epoch": 1.16, "learning_rate": 8.952097022956857e-06, "loss": 0.5966, "step": 3077 }, { "epoch": 1.16, "learning_rate": 8.945551480224234e-06, "loss": 0.6079, "step": 3078 }, { "epoch": 1.16, "learning_rate": 8.939006394308594e-06, "loss": 0.5553, "step": 3079 }, { "epoch": 1.16, "learning_rate": 8.932461768045456e-06, "loss": 0.5431, "step": 3080 }, { "epoch": 1.16, "learning_rate": 8.925917604270139e-06, "loss": 0.5156, "step": 3081 }, { "epoch": 1.16, "learning_rate": 8.919373905817758e-06, "loss": 0.5251, "step": 3082 }, { "epoch": 1.16, "learning_rate": 8.912830675523225e-06, "loss": 0.5849, "step": 3083 }, { "epoch": 1.16, "learning_rate": 8.906287916221259e-06, "loss": 0.5396, "step": 3084 }, { "epoch": 1.16, "learning_rate": 8.899745630746362e-06, "loss": 0.5322, "step": 3085 }, { "epoch": 1.16, "learning_rate": 8.893203821932845e-06, "loss": 0.5132, "step": 3086 }, { "epoch": 1.16, "learning_rate": 8.886662492614801e-06, "loss": 0.5833, "step": 3087 }, { "epoch": 1.16, "learning_rate": 8.880121645626121e-06, "loss": 0.5292, "step": 3088 }, { "epoch": 1.16, "learning_rate": 8.873581283800485e-06, "loss": 0.5727, "step": 3089 }, { "epoch": 1.17, "learning_rate": 8.86704140997136e-06, "loss": 0.569, "step": 3090 }, { "epoch": 1.17, "learning_rate": 8.860502026972004e-06, "loss": 0.5364, "step": 3091 }, { "epoch": 1.17, "learning_rate": 8.85396313763547e-06, "loss": 0.5583, "step": 3092 }, { "epoch": 1.17, "learning_rate": 8.847424744794587e-06, "loss": 0.5513, "step": 3093 }, { "epoch": 1.17, "learning_rate": 8.840886851281968e-06, "loss": 0.5557, "step": 3094 }, { "epoch": 1.17, "learning_rate": 8.834349459930019e-06, "loss": 0.5845, "step": 3095 }, { "epoch": 1.17, "learning_rate": 8.827812573570922e-06, "loss": 0.5048, "step": 3096 }, { "epoch": 1.17, "learning_rate": 8.821276195036637e-06, "loss": 0.5419, "step": 3097 }, { "epoch": 1.17, "learning_rate": 8.814740327158913e-06, "loss": 0.5909, "step": 3098 }, { "epoch": 1.17, "learning_rate": 8.808204972769275e-06, "loss": 0.59, "step": 3099 }, { "epoch": 1.17, "learning_rate": 8.801670134699022e-06, "loss": 0.584, "step": 3100 }, { "epoch": 1.17, "learning_rate": 8.795135815779228e-06, "loss": 0.5444, "step": 3101 }, { "epoch": 1.17, "learning_rate": 8.788602018840747e-06, "loss": 0.5362, "step": 3102 }, { "epoch": 1.17, "learning_rate": 8.782068746714205e-06, "loss": 0.548, "step": 3103 }, { "epoch": 1.17, "learning_rate": 8.77553600223e-06, "loss": 0.5644, "step": 3104 }, { "epoch": 1.17, "learning_rate": 8.769003788218305e-06, "loss": 0.5749, "step": 3105 }, { "epoch": 1.17, "learning_rate": 8.762472107509058e-06, "loss": 0.5587, "step": 3106 }, { "epoch": 1.17, "learning_rate": 8.75594096293197e-06, "loss": 0.5601, "step": 3107 }, { "epoch": 1.17, "learning_rate": 8.749410357316514e-06, "loss": 0.5457, "step": 3108 }, { "epoch": 1.17, "learning_rate": 8.742880293491938e-06, "loss": 0.5377, "step": 3109 }, { "epoch": 1.17, "learning_rate": 8.736350774287245e-06, "loss": 0.5479, "step": 3110 }, { "epoch": 1.17, "learning_rate": 8.729821802531213e-06, "loss": 0.5813, "step": 3111 }, { "epoch": 1.17, "learning_rate": 8.723293381052377e-06, "loss": 0.5508, "step": 3112 }, { "epoch": 1.17, "learning_rate": 8.716765512679033e-06, "loss": 0.5582, "step": 3113 }, { "epoch": 1.17, "learning_rate": 8.71023820023924e-06, "loss": 0.5235, "step": 3114 }, { "epoch": 1.17, "learning_rate": 8.703711446560812e-06, "loss": 0.5574, "step": 3115 }, { "epoch": 1.17, "learning_rate": 8.697185254471321e-06, "loss": 0.5379, "step": 3116 }, { "epoch": 1.18, "learning_rate": 8.690659626798109e-06, "loss": 0.5482, "step": 3117 }, { "epoch": 1.18, "learning_rate": 8.684134566368253e-06, "loss": 0.5561, "step": 3118 }, { "epoch": 1.18, "learning_rate": 8.6776100760086e-06, "loss": 0.551, "step": 3119 }, { "epoch": 1.18, "learning_rate": 8.67108615854574e-06, "loss": 0.5362, "step": 3120 }, { "epoch": 1.18, "learning_rate": 8.664562816806022e-06, "loss": 0.5576, "step": 3121 }, { "epoch": 1.18, "learning_rate": 8.658040053615543e-06, "loss": 0.5524, "step": 3122 }, { "epoch": 1.18, "learning_rate": 8.65151787180014e-06, "loss": 0.5501, "step": 3123 }, { "epoch": 1.18, "learning_rate": 8.644996274185419e-06, "loss": 0.5857, "step": 3124 }, { "epoch": 1.18, "learning_rate": 8.638475263596718e-06, "loss": 0.548, "step": 3125 }, { "epoch": 1.18, "learning_rate": 8.631954842859118e-06, "loss": 0.5164, "step": 3126 }, { "epoch": 1.18, "learning_rate": 8.625435014797455e-06, "loss": 0.5915, "step": 3127 }, { "epoch": 1.18, "learning_rate": 8.618915782236301e-06, "loss": 0.5356, "step": 3128 }, { "epoch": 1.18, "learning_rate": 8.612397147999974e-06, "loss": 0.5568, "step": 3129 }, { "epoch": 1.18, "learning_rate": 8.60587911491253e-06, "loss": 0.5897, "step": 3130 }, { "epoch": 1.18, "learning_rate": 8.599361685797765e-06, "loss": 0.5953, "step": 3131 }, { "epoch": 1.18, "learning_rate": 8.592844863479219e-06, "loss": 0.5301, "step": 3132 }, { "epoch": 1.18, "learning_rate": 8.58632865078016e-06, "loss": 0.5293, "step": 3133 }, { "epoch": 1.18, "learning_rate": 8.579813050523594e-06, "loss": 0.5488, "step": 3134 }, { "epoch": 1.18, "learning_rate": 8.573298065532263e-06, "loss": 0.5182, "step": 3135 }, { "epoch": 1.18, "learning_rate": 8.566783698628649e-06, "loss": 0.5296, "step": 3136 }, { "epoch": 1.18, "learning_rate": 8.560269952634957e-06, "loss": 0.5787, "step": 3137 }, { "epoch": 1.18, "learning_rate": 8.553756830373128e-06, "loss": 0.5447, "step": 3138 }, { "epoch": 1.18, "learning_rate": 8.54724433466483e-06, "loss": 0.515, "step": 3139 }, { "epoch": 1.18, "learning_rate": 8.540732468331462e-06, "loss": 0.5591, "step": 3140 }, { "epoch": 1.18, "learning_rate": 8.534221234194147e-06, "loss": 0.5411, "step": 3141 }, { "epoch": 1.18, "learning_rate": 8.527710635073734e-06, "loss": 0.4932, "step": 3142 }, { "epoch": 1.19, "learning_rate": 8.521200673790804e-06, "loss": 0.6261, "step": 3143 }, { "epoch": 1.19, "learning_rate": 8.514691353165655e-06, "loss": 0.5421, "step": 3144 }, { "epoch": 1.19, "learning_rate": 8.508182676018308e-06, "loss": 0.5317, "step": 3145 }, { "epoch": 1.19, "learning_rate": 8.501674645168507e-06, "loss": 0.6044, "step": 3146 }, { "epoch": 1.19, "learning_rate": 8.49516726343572e-06, "loss": 0.522, "step": 3147 }, { "epoch": 1.19, "learning_rate": 8.488660533639116e-06, "loss": 0.5674, "step": 3148 }, { "epoch": 1.19, "learning_rate": 8.482154458597611e-06, "loss": 0.5467, "step": 3149 }, { "epoch": 1.19, "learning_rate": 8.475649041129814e-06, "loss": 0.5282, "step": 3150 }, { "epoch": 1.19, "learning_rate": 8.469144284054054e-06, "loss": 0.5409, "step": 3151 }, { "epoch": 1.19, "learning_rate": 8.462640190188379e-06, "loss": 0.4896, "step": 3152 }, { "epoch": 1.19, "learning_rate": 8.456136762350546e-06, "loss": 0.5798, "step": 3153 }, { "epoch": 1.19, "learning_rate": 8.449634003358022e-06, "loss": 0.54, "step": 3154 }, { "epoch": 1.19, "learning_rate": 8.443131916027991e-06, "loss": 0.5606, "step": 3155 }, { "epoch": 1.19, "learning_rate": 8.436630503177339e-06, "loss": 0.5329, "step": 3156 }, { "epoch": 1.19, "learning_rate": 8.430129767622663e-06, "loss": 0.5707, "step": 3157 }, { "epoch": 1.19, "learning_rate": 8.423629712180265e-06, "loss": 0.5173, "step": 3158 }, { "epoch": 1.19, "learning_rate": 8.417130339666154e-06, "loss": 0.484, "step": 3159 }, { "epoch": 1.19, "learning_rate": 8.41063165289604e-06, "loss": 0.5654, "step": 3160 }, { "epoch": 1.19, "learning_rate": 8.404133654685343e-06, "loss": 0.521, "step": 3161 }, { "epoch": 1.19, "learning_rate": 8.397636347849176e-06, "loss": 0.5701, "step": 3162 }, { "epoch": 1.19, "learning_rate": 8.391139735202359e-06, "loss": 0.5183, "step": 3163 }, { "epoch": 1.19, "learning_rate": 8.384643819559407e-06, "loss": 0.5252, "step": 3164 }, { "epoch": 1.19, "learning_rate": 8.378148603734537e-06, "loss": 0.5065, "step": 3165 }, { "epoch": 1.19, "learning_rate": 8.371654090541656e-06, "loss": 0.5682, "step": 3166 }, { "epoch": 1.19, "learning_rate": 8.365160282794372e-06, "loss": 0.5087, "step": 3167 }, { "epoch": 1.19, "learning_rate": 8.358667183305993e-06, "loss": 0.5512, "step": 3168 }, { "epoch": 1.19, "learning_rate": 8.35217479488951e-06, "loss": 0.5687, "step": 3169 }, { "epoch": 1.2, "learning_rate": 8.345683120357607e-06, "loss": 0.4722, "step": 3170 }, { "epoch": 1.2, "learning_rate": 8.339192162522664e-06, "loss": 0.5558, "step": 3171 }, { "epoch": 1.2, "learning_rate": 8.33270192419675e-06, "loss": 0.5568, "step": 3172 }, { "epoch": 1.2, "learning_rate": 8.32621240819161e-06, "loss": 0.5502, "step": 3173 }, { "epoch": 1.2, "learning_rate": 8.3197236173187e-06, "loss": 0.5385, "step": 3174 }, { "epoch": 1.2, "learning_rate": 8.313235554389143e-06, "loss": 0.5672, "step": 3175 }, { "epoch": 1.2, "learning_rate": 8.306748222213748e-06, "loss": 0.5576, "step": 3176 }, { "epoch": 1.2, "learning_rate": 8.300261623603014e-06, "loss": 0.554, "step": 3177 }, { "epoch": 1.2, "learning_rate": 8.29377576136712e-06, "loss": 0.5504, "step": 3178 }, { "epoch": 1.2, "learning_rate": 8.287290638315922e-06, "loss": 0.5956, "step": 3179 }, { "epoch": 1.2, "learning_rate": 8.280806257258965e-06, "loss": 0.5643, "step": 3180 }, { "epoch": 1.2, "learning_rate": 8.274322621005461e-06, "loss": 0.5404, "step": 3181 }, { "epoch": 1.2, "learning_rate": 8.26783973236431e-06, "loss": 0.6116, "step": 3182 }, { "epoch": 1.2, "learning_rate": 8.261357594144079e-06, "loss": 0.5583, "step": 3183 }, { "epoch": 1.2, "learning_rate": 8.254876209153015e-06, "loss": 0.5724, "step": 3184 }, { "epoch": 1.2, "learning_rate": 8.24839558019904e-06, "loss": 0.5392, "step": 3185 }, { "epoch": 1.2, "learning_rate": 8.24191571008974e-06, "loss": 0.5309, "step": 3186 }, { "epoch": 1.2, "learning_rate": 8.235436601632386e-06, "loss": 0.5667, "step": 3187 }, { "epoch": 1.2, "learning_rate": 8.228958257633906e-06, "loss": 0.5282, "step": 3188 }, { "epoch": 1.2, "learning_rate": 8.222480680900906e-06, "loss": 0.5555, "step": 3189 }, { "epoch": 1.2, "learning_rate": 8.216003874239656e-06, "loss": 0.6029, "step": 3190 }, { "epoch": 1.2, "learning_rate": 8.20952784045609e-06, "loss": 0.5277, "step": 3191 }, { "epoch": 1.2, "learning_rate": 8.203052582355802e-06, "loss": 0.5287, "step": 3192 }, { "epoch": 1.2, "learning_rate": 8.196578102744074e-06, "loss": 0.5641, "step": 3193 }, { "epoch": 1.2, "learning_rate": 8.19010440442582e-06, "loss": 0.5383, "step": 3194 }, { "epoch": 1.2, "learning_rate": 8.183631490205636e-06, "loss": 0.5675, "step": 3195 }, { "epoch": 1.21, "learning_rate": 8.177159362887773e-06, "loss": 0.5535, "step": 3196 }, { "epoch": 1.21, "learning_rate": 8.170688025276134e-06, "loss": 0.5592, "step": 3197 }, { "epoch": 1.21, "learning_rate": 8.164217480174291e-06, "loss": 0.6134, "step": 3198 }, { "epoch": 1.21, "learning_rate": 8.157747730385465e-06, "loss": 0.5808, "step": 3199 }, { "epoch": 1.21, "learning_rate": 8.15127877871254e-06, "loss": 0.564, "step": 3200 }, { "epoch": 1.21, "learning_rate": 8.144810627958043e-06, "loss": 0.5331, "step": 3201 }, { "epoch": 1.21, "learning_rate": 8.13834328092416e-06, "loss": 0.5522, "step": 3202 }, { "epoch": 1.21, "learning_rate": 8.131876740412733e-06, "loss": 0.603, "step": 3203 }, { "epoch": 1.21, "learning_rate": 8.12541100922525e-06, "loss": 0.5501, "step": 3204 }, { "epoch": 1.21, "learning_rate": 8.118946090162844e-06, "loss": 0.5745, "step": 3205 }, { "epoch": 1.21, "learning_rate": 8.112481986026307e-06, "loss": 0.5514, "step": 3206 }, { "epoch": 1.21, "learning_rate": 8.106018699616074e-06, "loss": 0.538, "step": 3207 }, { "epoch": 1.21, "learning_rate": 8.09955623373222e-06, "loss": 0.565, "step": 3208 }, { "epoch": 1.21, "learning_rate": 8.093094591174466e-06, "loss": 0.5544, "step": 3209 }, { "epoch": 1.21, "learning_rate": 8.086633774742182e-06, "loss": 0.5494, "step": 3210 }, { "epoch": 1.21, "learning_rate": 8.080173787234373e-06, "loss": 0.6046, "step": 3211 }, { "epoch": 1.21, "learning_rate": 8.073714631449695e-06, "loss": 0.5804, "step": 3212 }, { "epoch": 1.21, "learning_rate": 8.067256310186433e-06, "loss": 0.5146, "step": 3213 }, { "epoch": 1.21, "learning_rate": 8.060798826242517e-06, "loss": 0.5685, "step": 3214 }, { "epoch": 1.21, "learning_rate": 8.054342182415513e-06, "loss": 0.551, "step": 3215 }, { "epoch": 1.21, "learning_rate": 8.047886381502618e-06, "loss": 0.5504, "step": 3216 }, { "epoch": 1.21, "learning_rate": 8.041431426300665e-06, "loss": 0.545, "step": 3217 }, { "epoch": 1.21, "learning_rate": 8.034977319606135e-06, "loss": 0.5653, "step": 3218 }, { "epoch": 1.21, "learning_rate": 8.028524064215121e-06, "loss": 0.5623, "step": 3219 }, { "epoch": 1.21, "learning_rate": 8.022071662923361e-06, "loss": 0.6194, "step": 3220 }, { "epoch": 1.21, "learning_rate": 8.015620118526214e-06, "loss": 0.5108, "step": 3221 }, { "epoch": 1.21, "learning_rate": 8.009169433818672e-06, "loss": 0.5775, "step": 3222 }, { "epoch": 1.22, "learning_rate": 8.002719611595358e-06, "loss": 0.5302, "step": 3223 }, { "epoch": 1.22, "learning_rate": 7.996270654650516e-06, "loss": 0.5489, "step": 3224 }, { "epoch": 1.22, "learning_rate": 7.989822565778016e-06, "loss": 0.5231, "step": 3225 }, { "epoch": 1.22, "learning_rate": 7.983375347771356e-06, "loss": 0.5336, "step": 3226 }, { "epoch": 1.22, "learning_rate": 7.976929003423649e-06, "loss": 0.5142, "step": 3227 }, { "epoch": 1.22, "learning_rate": 7.970483535527637e-06, "loss": 0.5204, "step": 3228 }, { "epoch": 1.22, "learning_rate": 7.964038946875678e-06, "loss": 0.5435, "step": 3229 }, { "epoch": 1.22, "learning_rate": 7.95759524025975e-06, "loss": 0.5255, "step": 3230 }, { "epoch": 1.22, "learning_rate": 7.951152418471452e-06, "loss": 0.546, "step": 3231 }, { "epoch": 1.22, "learning_rate": 7.944710484301995e-06, "loss": 0.5461, "step": 3232 }, { "epoch": 1.22, "learning_rate": 7.93826944054221e-06, "loss": 0.4924, "step": 3233 }, { "epoch": 1.22, "learning_rate": 7.931829289982536e-06, "loss": 0.5483, "step": 3234 }, { "epoch": 1.22, "learning_rate": 7.925390035413031e-06, "loss": 0.5538, "step": 3235 }, { "epoch": 1.22, "learning_rate": 7.918951679623359e-06, "loss": 0.5333, "step": 3236 }, { "epoch": 1.22, "learning_rate": 7.912514225402803e-06, "loss": 0.5269, "step": 3237 }, { "epoch": 1.22, "learning_rate": 7.906077675540249e-06, "loss": 0.5663, "step": 3238 }, { "epoch": 1.22, "learning_rate": 7.899642032824193e-06, "loss": 0.5453, "step": 3239 }, { "epoch": 1.22, "learning_rate": 7.89320730004274e-06, "loss": 0.542, "step": 3240 }, { "epoch": 1.22, "learning_rate": 7.886773479983593e-06, "loss": 0.4935, "step": 3241 }, { "epoch": 1.22, "learning_rate": 7.880340575434065e-06, "loss": 0.4829, "step": 3242 }, { "epoch": 1.22, "learning_rate": 7.873908589181081e-06, "loss": 0.5099, "step": 3243 }, { "epoch": 1.22, "learning_rate": 7.867477524011154e-06, "loss": 0.5559, "step": 3244 }, { "epoch": 1.22, "learning_rate": 7.861047382710402e-06, "loss": 0.5046, "step": 3245 }, { "epoch": 1.22, "learning_rate": 7.854618168064549e-06, "loss": 0.5117, "step": 3246 }, { "epoch": 1.22, "learning_rate": 7.84818988285891e-06, "loss": 0.4805, "step": 3247 }, { "epoch": 1.22, "learning_rate": 7.841762529878404e-06, "loss": 0.5404, "step": 3248 }, { "epoch": 1.23, "learning_rate": 7.835336111907533e-06, "loss": 0.5452, "step": 3249 }, { "epoch": 1.23, "learning_rate": 7.828910631730412e-06, "loss": 0.5844, "step": 3250 }, { "epoch": 1.23, "learning_rate": 7.822486092130744e-06, "loss": 0.5988, "step": 3251 }, { "epoch": 1.23, "learning_rate": 7.816062495891813e-06, "loss": 0.5427, "step": 3252 }, { "epoch": 1.23, "learning_rate": 7.809639845796507e-06, "loss": 0.4803, "step": 3253 }, { "epoch": 1.23, "learning_rate": 7.803218144627299e-06, "loss": 0.549, "step": 3254 }, { "epoch": 1.23, "learning_rate": 7.796797395166252e-06, "loss": 0.537, "step": 3255 }, { "epoch": 1.23, "learning_rate": 7.790377600195018e-06, "loss": 0.5198, "step": 3256 }, { "epoch": 1.23, "learning_rate": 7.783958762494835e-06, "loss": 0.5362, "step": 3257 }, { "epoch": 1.23, "learning_rate": 7.777540884846525e-06, "loss": 0.5646, "step": 3258 }, { "epoch": 1.23, "learning_rate": 7.771123970030492e-06, "loss": 0.5603, "step": 3259 }, { "epoch": 1.23, "learning_rate": 7.764708020826726e-06, "loss": 0.5319, "step": 3260 }, { "epoch": 1.23, "learning_rate": 7.758293040014798e-06, "loss": 0.5801, "step": 3261 }, { "epoch": 1.23, "learning_rate": 7.751879030373862e-06, "loss": 0.5425, "step": 3262 }, { "epoch": 1.23, "learning_rate": 7.745465994682645e-06, "loss": 0.531, "step": 3263 }, { "epoch": 1.23, "learning_rate": 7.73905393571946e-06, "loss": 0.5653, "step": 3264 }, { "epoch": 1.23, "learning_rate": 7.732642856262189e-06, "loss": 0.5582, "step": 3265 }, { "epoch": 1.23, "learning_rate": 7.7262327590883e-06, "loss": 0.5171, "step": 3266 }, { "epoch": 1.23, "learning_rate": 7.719823646974814e-06, "loss": 0.5053, "step": 3267 }, { "epoch": 1.23, "learning_rate": 7.713415522698357e-06, "loss": 0.5289, "step": 3268 }, { "epoch": 1.23, "learning_rate": 7.707008389035102e-06, "loss": 0.556, "step": 3269 }, { "epoch": 1.23, "learning_rate": 7.700602248760801e-06, "loss": 0.5813, "step": 3270 }, { "epoch": 1.23, "learning_rate": 7.694197104650775e-06, "loss": 0.5219, "step": 3271 }, { "epoch": 1.23, "learning_rate": 7.687792959479916e-06, "loss": 0.5695, "step": 3272 }, { "epoch": 1.23, "learning_rate": 7.681389816022682e-06, "loss": 0.5328, "step": 3273 }, { "epoch": 1.23, "learning_rate": 7.674987677053089e-06, "loss": 0.5873, "step": 3274 }, { "epoch": 1.23, "learning_rate": 7.668586545344734e-06, "loss": 0.6041, "step": 3275 }, { "epoch": 1.24, "learning_rate": 7.662186423670768e-06, "loss": 0.5718, "step": 3276 }, { "epoch": 1.24, "learning_rate": 7.6557873148039e-06, "loss": 0.6029, "step": 3277 }, { "epoch": 1.24, "learning_rate": 7.649389221516409e-06, "loss": 0.5371, "step": 3278 }, { "epoch": 1.24, "learning_rate": 7.642992146580127e-06, "loss": 0.5727, "step": 3279 }, { "epoch": 1.24, "learning_rate": 7.636596092766448e-06, "loss": 0.5461, "step": 3280 }, { "epoch": 1.24, "learning_rate": 7.630201062846329e-06, "loss": 0.5574, "step": 3281 }, { "epoch": 1.24, "learning_rate": 7.623807059590276e-06, "loss": 0.574, "step": 3282 }, { "epoch": 1.24, "learning_rate": 7.617414085768352e-06, "loss": 0.534, "step": 3283 }, { "epoch": 1.24, "learning_rate": 7.611022144150173e-06, "loss": 0.5683, "step": 3284 }, { "epoch": 1.24, "learning_rate": 7.604631237504912e-06, "loss": 0.5475, "step": 3285 }, { "epoch": 1.24, "learning_rate": 7.598241368601285e-06, "loss": 0.5554, "step": 3286 }, { "epoch": 1.24, "learning_rate": 7.591852540207572e-06, "loss": 0.5134, "step": 3287 }, { "epoch": 1.24, "learning_rate": 7.5854647550915895e-06, "loss": 0.5325, "step": 3288 }, { "epoch": 1.24, "learning_rate": 7.57907801602071e-06, "loss": 0.5335, "step": 3289 }, { "epoch": 1.24, "learning_rate": 7.572692325761848e-06, "loss": 0.5134, "step": 3290 }, { "epoch": 1.24, "learning_rate": 7.566307687081469e-06, "loss": 0.5915, "step": 3291 }, { "epoch": 1.24, "learning_rate": 7.559924102745573e-06, "loss": 0.5579, "step": 3292 }, { "epoch": 1.24, "learning_rate": 7.553541575519709e-06, "loss": 0.5931, "step": 3293 }, { "epoch": 1.24, "learning_rate": 7.547160108168981e-06, "loss": 0.5601, "step": 3294 }, { "epoch": 1.24, "learning_rate": 7.54077970345801e-06, "loss": 0.5633, "step": 3295 }, { "epoch": 1.24, "learning_rate": 7.534400364150972e-06, "loss": 0.561, "step": 3296 }, { "epoch": 1.24, "learning_rate": 7.5280220930115766e-06, "loss": 0.5737, "step": 3297 }, { "epoch": 1.24, "learning_rate": 7.521644892803075e-06, "loss": 0.4767, "step": 3298 }, { "epoch": 1.24, "learning_rate": 7.5152687662882405e-06, "loss": 0.4994, "step": 3299 }, { "epoch": 1.24, "learning_rate": 7.508893716229405e-06, "loss": 0.5391, "step": 3300 }, { "epoch": 1.24, "learning_rate": 7.502519745388418e-06, "loss": 0.5414, "step": 3301 }, { "epoch": 1.25, "learning_rate": 7.496146856526658e-06, "loss": 0.5656, "step": 3302 }, { "epoch": 1.25, "learning_rate": 7.4897750524050465e-06, "loss": 0.5222, "step": 3303 }, { "epoch": 1.25, "learning_rate": 7.483404335784027e-06, "loss": 0.5708, "step": 3304 }, { "epoch": 1.25, "learning_rate": 7.477034709423574e-06, "loss": 0.5449, "step": 3305 }, { "epoch": 1.25, "learning_rate": 7.470666176083193e-06, "loss": 0.5597, "step": 3306 }, { "epoch": 1.25, "learning_rate": 7.464298738521911e-06, "loss": 0.5751, "step": 3307 }, { "epoch": 1.25, "learning_rate": 7.457932399498285e-06, "loss": 0.5482, "step": 3308 }, { "epoch": 1.25, "learning_rate": 7.451567161770393e-06, "loss": 0.5716, "step": 3309 }, { "epoch": 1.25, "learning_rate": 7.445203028095833e-06, "loss": 0.6046, "step": 3310 }, { "epoch": 1.25, "learning_rate": 7.438840001231731e-06, "loss": 0.5157, "step": 3311 }, { "epoch": 1.25, "learning_rate": 7.432478083934727e-06, "loss": 0.5135, "step": 3312 }, { "epoch": 1.25, "learning_rate": 7.426117278960987e-06, "loss": 0.5655, "step": 3313 }, { "epoch": 1.25, "learning_rate": 7.419757589066195e-06, "loss": 0.5376, "step": 3314 }, { "epoch": 1.25, "learning_rate": 7.413399017005542e-06, "loss": 0.5517, "step": 3315 }, { "epoch": 1.25, "learning_rate": 7.4070415655337505e-06, "loss": 0.5692, "step": 3316 }, { "epoch": 1.25, "learning_rate": 7.400685237405038e-06, "loss": 0.6043, "step": 3317 }, { "epoch": 1.25, "learning_rate": 7.39433003537315e-06, "loss": 0.5079, "step": 3318 }, { "epoch": 1.25, "learning_rate": 7.387975962191347e-06, "loss": 0.528, "step": 3319 }, { "epoch": 1.25, "learning_rate": 7.381623020612385e-06, "loss": 0.4652, "step": 3320 }, { "epoch": 1.25, "learning_rate": 7.375271213388542e-06, "loss": 0.6364, "step": 3321 }, { "epoch": 1.25, "learning_rate": 7.368920543271601e-06, "loss": 0.5405, "step": 3322 }, { "epoch": 1.25, "learning_rate": 7.362571013012851e-06, "loss": 0.497, "step": 3323 }, { "epoch": 1.25, "learning_rate": 7.356222625363087e-06, "loss": 0.5817, "step": 3324 }, { "epoch": 1.25, "learning_rate": 7.349875383072614e-06, "loss": 0.5779, "step": 3325 }, { "epoch": 1.25, "learning_rate": 7.343529288891239e-06, "loss": 0.5405, "step": 3326 }, { "epoch": 1.25, "learning_rate": 7.337184345568264e-06, "loss": 0.5753, "step": 3327 }, { "epoch": 1.25, "learning_rate": 7.3308405558525006e-06, "loss": 0.5464, "step": 3328 }, { "epoch": 1.26, "learning_rate": 7.32449792249226e-06, "loss": 0.5481, "step": 3329 }, { "epoch": 1.26, "learning_rate": 7.318156448235345e-06, "loss": 0.5436, "step": 3330 }, { "epoch": 1.26, "learning_rate": 7.3118161358290706e-06, "loss": 0.5627, "step": 3331 }, { "epoch": 1.26, "learning_rate": 7.3054769880202345e-06, "loss": 0.5519, "step": 3332 }, { "epoch": 1.26, "learning_rate": 7.299139007555137e-06, "loss": 0.5829, "step": 3333 }, { "epoch": 1.26, "learning_rate": 7.292802197179571e-06, "loss": 0.522, "step": 3334 }, { "epoch": 1.26, "learning_rate": 7.28646655963882e-06, "loss": 0.5231, "step": 3335 }, { "epoch": 1.26, "learning_rate": 7.280132097677664e-06, "loss": 0.5069, "step": 3336 }, { "epoch": 1.26, "learning_rate": 7.2737988140403694e-06, "loss": 0.555, "step": 3337 }, { "epoch": 1.26, "learning_rate": 7.2674667114706966e-06, "loss": 0.5744, "step": 3338 }, { "epoch": 1.26, "learning_rate": 7.26113579271189e-06, "loss": 0.5641, "step": 3339 }, { "epoch": 1.26, "learning_rate": 7.254806060506685e-06, "loss": 0.5774, "step": 3340 }, { "epoch": 1.26, "learning_rate": 7.248477517597301e-06, "loss": 0.5187, "step": 3341 }, { "epoch": 1.26, "learning_rate": 7.242150166725439e-06, "loss": 0.6055, "step": 3342 }, { "epoch": 1.26, "learning_rate": 7.235824010632284e-06, "loss": 0.5305, "step": 3343 }, { "epoch": 1.26, "learning_rate": 7.2294990520585185e-06, "loss": 0.5617, "step": 3344 }, { "epoch": 1.26, "learning_rate": 7.223175293744283e-06, "loss": 0.5811, "step": 3345 }, { "epoch": 1.26, "learning_rate": 7.216852738429212e-06, "loss": 0.6351, "step": 3346 }, { "epoch": 1.26, "learning_rate": 7.2105313888524165e-06, "loss": 0.5271, "step": 3347 }, { "epoch": 1.26, "learning_rate": 7.204211247752484e-06, "loss": 0.5447, "step": 3348 }, { "epoch": 1.26, "learning_rate": 7.1978923178674745e-06, "loss": 0.5065, "step": 3349 }, { "epoch": 1.26, "learning_rate": 7.191574601934934e-06, "loss": 0.5515, "step": 3350 }, { "epoch": 1.26, "learning_rate": 7.185258102691872e-06, "loss": 0.5664, "step": 3351 }, { "epoch": 1.26, "learning_rate": 7.178942822874781e-06, "loss": 0.5404, "step": 3352 }, { "epoch": 1.26, "learning_rate": 7.17262876521961e-06, "loss": 0.5184, "step": 3353 }, { "epoch": 1.26, "learning_rate": 7.166315932461794e-06, "loss": 0.5737, "step": 3354 }, { "epoch": 1.27, "learning_rate": 7.160004327336227e-06, "loss": 0.5228, "step": 3355 }, { "epoch": 1.27, "learning_rate": 7.153693952577277e-06, "loss": 0.55, "step": 3356 }, { "epoch": 1.27, "learning_rate": 7.14738481091878e-06, "loss": 0.5872, "step": 3357 }, { "epoch": 1.27, "learning_rate": 7.141076905094033e-06, "loss": 0.5246, "step": 3358 }, { "epoch": 1.27, "learning_rate": 7.134770237835801e-06, "loss": 0.5016, "step": 3359 }, { "epoch": 1.27, "learning_rate": 7.128464811876306e-06, "loss": 0.5503, "step": 3360 }, { "epoch": 1.27, "learning_rate": 7.1221606299472424e-06, "loss": 0.6333, "step": 3361 }, { "epoch": 1.27, "learning_rate": 7.115857694779756e-06, "loss": 0.5094, "step": 3362 }, { "epoch": 1.27, "learning_rate": 7.109556009104462e-06, "loss": 0.5081, "step": 3363 }, { "epoch": 1.27, "learning_rate": 7.103255575651426e-06, "loss": 0.5549, "step": 3364 }, { "epoch": 1.27, "learning_rate": 7.096956397150174e-06, "loss": 0.5835, "step": 3365 }, { "epoch": 1.27, "learning_rate": 7.090658476329695e-06, "loss": 0.5711, "step": 3366 }, { "epoch": 1.27, "learning_rate": 7.084361815918417e-06, "loss": 0.5859, "step": 3367 }, { "epoch": 1.27, "learning_rate": 7.078066418644234e-06, "loss": 0.5815, "step": 3368 }, { "epoch": 1.27, "learning_rate": 7.071772287234497e-06, "loss": 0.5737, "step": 3369 }, { "epoch": 1.27, "learning_rate": 7.065479424415998e-06, "loss": 0.5497, "step": 3370 }, { "epoch": 1.27, "learning_rate": 7.059187832914982e-06, "loss": 0.6002, "step": 3371 }, { "epoch": 1.27, "learning_rate": 7.0528975154571465e-06, "loss": 0.5416, "step": 3372 }, { "epoch": 1.27, "learning_rate": 7.046608474767636e-06, "loss": 0.5277, "step": 3373 }, { "epoch": 1.27, "learning_rate": 7.040320713571043e-06, "loss": 0.548, "step": 3374 }, { "epoch": 1.27, "learning_rate": 7.0340342345913935e-06, "loss": 0.5198, "step": 3375 }, { "epoch": 1.27, "learning_rate": 7.02774904055218e-06, "loss": 0.5451, "step": 3376 }, { "epoch": 1.27, "learning_rate": 7.0214651341763255e-06, "loss": 0.5521, "step": 3377 }, { "epoch": 1.27, "learning_rate": 7.015182518186191e-06, "loss": 0.5514, "step": 3378 }, { "epoch": 1.27, "learning_rate": 7.008901195303586e-06, "loss": 0.5964, "step": 3379 }, { "epoch": 1.27, "learning_rate": 7.002621168249759e-06, "loss": 0.5457, "step": 3380 }, { "epoch": 1.27, "learning_rate": 6.996342439745391e-06, "loss": 0.5516, "step": 3381 }, { "epoch": 1.28, "learning_rate": 6.9900650125106126e-06, "loss": 0.4999, "step": 3382 }, { "epoch": 1.28, "learning_rate": 6.98378888926498e-06, "loss": 0.5452, "step": 3383 }, { "epoch": 1.28, "learning_rate": 6.977514072727487e-06, "loss": 0.5523, "step": 3384 }, { "epoch": 1.28, "learning_rate": 6.971240565616562e-06, "loss": 0.5387, "step": 3385 }, { "epoch": 1.28, "learning_rate": 6.964968370650065e-06, "loss": 0.5716, "step": 3386 }, { "epoch": 1.28, "learning_rate": 6.958697490545288e-06, "loss": 0.5668, "step": 3387 }, { "epoch": 1.28, "learning_rate": 6.952427928018957e-06, "loss": 0.5576, "step": 3388 }, { "epoch": 1.28, "learning_rate": 6.946159685787223e-06, "loss": 0.5426, "step": 3389 }, { "epoch": 1.28, "learning_rate": 6.939892766565666e-06, "loss": 0.5689, "step": 3390 }, { "epoch": 1.28, "learning_rate": 6.933627173069294e-06, "loss": 0.545, "step": 3391 }, { "epoch": 1.28, "learning_rate": 6.927362908012541e-06, "loss": 0.464, "step": 3392 }, { "epoch": 1.28, "learning_rate": 6.921099974109255e-06, "loss": 0.5582, "step": 3393 }, { "epoch": 1.28, "learning_rate": 6.9148383740727296e-06, "loss": 0.5434, "step": 3394 }, { "epoch": 1.28, "learning_rate": 6.90857811061566e-06, "loss": 0.5332, "step": 3395 }, { "epoch": 1.28, "learning_rate": 6.902319186450171e-06, "loss": 0.5652, "step": 3396 }, { "epoch": 1.28, "learning_rate": 6.896061604287805e-06, "loss": 0.5044, "step": 3397 }, { "epoch": 1.28, "learning_rate": 6.8898053668395256e-06, "loss": 0.532, "step": 3398 }, { "epoch": 1.28, "learning_rate": 6.883550476815714e-06, "loss": 0.5491, "step": 3399 }, { "epoch": 1.28, "learning_rate": 6.877296936926157e-06, "loss": 0.5743, "step": 3400 }, { "epoch": 1.28, "learning_rate": 6.871044749880074e-06, "loss": 0.5529, "step": 3401 }, { "epoch": 1.28, "learning_rate": 6.864793918386092e-06, "loss": 0.5611, "step": 3402 }, { "epoch": 1.28, "learning_rate": 6.85854444515224e-06, "loss": 0.5444, "step": 3403 }, { "epoch": 1.28, "learning_rate": 6.8522963328859705e-06, "loss": 0.5361, "step": 3404 }, { "epoch": 1.28, "learning_rate": 6.846049584294143e-06, "loss": 0.5645, "step": 3405 }, { "epoch": 1.28, "learning_rate": 6.839804202083024e-06, "loss": 0.4991, "step": 3406 }, { "epoch": 1.28, "learning_rate": 6.833560188958295e-06, "loss": 0.6463, "step": 3407 }, { "epoch": 1.29, "learning_rate": 6.827317547625037e-06, "loss": 0.6031, "step": 3408 }, { "epoch": 1.29, "learning_rate": 6.821076280787738e-06, "loss": 0.5088, "step": 3409 }, { "epoch": 1.29, "learning_rate": 6.814836391150292e-06, "loss": 0.5703, "step": 3410 }, { "epoch": 1.29, "learning_rate": 6.808597881415997e-06, "loss": 0.5221, "step": 3411 }, { "epoch": 1.29, "learning_rate": 6.802360754287548e-06, "loss": 0.5809, "step": 3412 }, { "epoch": 1.29, "learning_rate": 6.796125012467052e-06, "loss": 0.5933, "step": 3413 }, { "epoch": 1.29, "learning_rate": 6.789890658656004e-06, "loss": 0.5503, "step": 3414 }, { "epoch": 1.29, "learning_rate": 6.783657695555307e-06, "loss": 0.5459, "step": 3415 }, { "epoch": 1.29, "learning_rate": 6.777426125865254e-06, "loss": 0.5586, "step": 3416 }, { "epoch": 1.29, "learning_rate": 6.771195952285541e-06, "loss": 0.5455, "step": 3417 }, { "epoch": 1.29, "learning_rate": 6.764967177515251e-06, "loss": 0.5733, "step": 3418 }, { "epoch": 1.29, "learning_rate": 6.758739804252864e-06, "loss": 0.5482, "step": 3419 }, { "epoch": 1.29, "learning_rate": 6.752513835196265e-06, "loss": 0.5705, "step": 3420 }, { "epoch": 1.29, "learning_rate": 6.746289273042712e-06, "loss": 0.5893, "step": 3421 }, { "epoch": 1.29, "learning_rate": 6.740066120488864e-06, "loss": 0.6386, "step": 3422 }, { "epoch": 1.29, "learning_rate": 6.733844380230765e-06, "loss": 0.5274, "step": 3423 }, { "epoch": 1.29, "learning_rate": 6.727624054963855e-06, "loss": 0.5752, "step": 3424 }, { "epoch": 1.29, "learning_rate": 6.721405147382944e-06, "loss": 0.5714, "step": 3425 }, { "epoch": 1.29, "learning_rate": 6.715187660182252e-06, "loss": 0.5528, "step": 3426 }, { "epoch": 1.29, "learning_rate": 6.708971596055365e-06, "loss": 0.5347, "step": 3427 }, { "epoch": 1.29, "learning_rate": 6.7027569576952565e-06, "loss": 0.5336, "step": 3428 }, { "epoch": 1.29, "learning_rate": 6.696543747794286e-06, "loss": 0.5448, "step": 3429 }, { "epoch": 1.29, "learning_rate": 6.690331969044192e-06, "loss": 0.5304, "step": 3430 }, { "epoch": 1.29, "learning_rate": 6.684121624136089e-06, "loss": 0.4937, "step": 3431 }, { "epoch": 1.29, "learning_rate": 6.677912715760482e-06, "loss": 0.5689, "step": 3432 }, { "epoch": 1.29, "learning_rate": 6.671705246607242e-06, "loss": 0.4724, "step": 3433 }, { "epoch": 1.29, "learning_rate": 6.66549921936562e-06, "loss": 0.5559, "step": 3434 }, { "epoch": 1.3, "learning_rate": 6.659294636724246e-06, "loss": 0.5814, "step": 3435 }, { "epoch": 1.3, "learning_rate": 6.653091501371118e-06, "loss": 0.5789, "step": 3436 }, { "epoch": 1.3, "learning_rate": 6.646889815993607e-06, "loss": 0.5281, "step": 3437 }, { "epoch": 1.3, "learning_rate": 6.640689583278465e-06, "loss": 0.5036, "step": 3438 }, { "epoch": 1.3, "learning_rate": 6.634490805911808e-06, "loss": 0.5826, "step": 3439 }, { "epoch": 1.3, "learning_rate": 6.628293486579118e-06, "loss": 0.5383, "step": 3440 }, { "epoch": 1.3, "learning_rate": 6.622097627965254e-06, "loss": 0.5309, "step": 3441 }, { "epoch": 1.3, "learning_rate": 6.615903232754436e-06, "loss": 0.5479, "step": 3442 }, { "epoch": 1.3, "learning_rate": 6.609710303630249e-06, "loss": 0.5549, "step": 3443 }, { "epoch": 1.3, "learning_rate": 6.6035188432756425e-06, "loss": 0.5728, "step": 3444 }, { "epoch": 1.3, "learning_rate": 6.597328854372944e-06, "loss": 0.5401, "step": 3445 }, { "epoch": 1.3, "learning_rate": 6.591140339603823e-06, "loss": 0.4818, "step": 3446 }, { "epoch": 1.3, "learning_rate": 6.584953301649324e-06, "loss": 0.5642, "step": 3447 }, { "epoch": 1.3, "learning_rate": 6.578767743189842e-06, "loss": 0.5717, "step": 3448 }, { "epoch": 1.3, "learning_rate": 6.572583666905141e-06, "loss": 0.5976, "step": 3449 }, { "epoch": 1.3, "learning_rate": 6.566401075474336e-06, "loss": 0.5856, "step": 3450 }, { "epoch": 1.3, "learning_rate": 6.560219971575901e-06, "loss": 0.5512, "step": 3451 }, { "epoch": 1.3, "learning_rate": 6.5540403578876685e-06, "loss": 0.4866, "step": 3452 }, { "epoch": 1.3, "learning_rate": 6.547862237086817e-06, "loss": 0.5032, "step": 3453 }, { "epoch": 1.3, "learning_rate": 6.5416856118498874e-06, "loss": 0.5694, "step": 3454 }, { "epoch": 1.3, "learning_rate": 6.535510484852767e-06, "loss": 0.5077, "step": 3455 }, { "epoch": 1.3, "learning_rate": 6.529336858770694e-06, "loss": 0.5413, "step": 3456 }, { "epoch": 1.3, "learning_rate": 6.523164736278263e-06, "loss": 0.526, "step": 3457 }, { "epoch": 1.3, "learning_rate": 6.5169941200494094e-06, "loss": 0.5317, "step": 3458 }, { "epoch": 1.3, "learning_rate": 6.5108250127574195e-06, "loss": 0.6235, "step": 3459 }, { "epoch": 1.3, "learning_rate": 6.5046574170749274e-06, "loss": 0.4901, "step": 3460 }, { "epoch": 1.31, "learning_rate": 6.498491335673906e-06, "loss": 0.594, "step": 3461 }, { "epoch": 1.31, "learning_rate": 6.492326771225681e-06, "loss": 0.4988, "step": 3462 }, { "epoch": 1.31, "learning_rate": 6.486163726400911e-06, "loss": 0.5125, "step": 3463 }, { "epoch": 1.31, "learning_rate": 6.480002203869607e-06, "loss": 0.561, "step": 3464 }, { "epoch": 1.31, "learning_rate": 6.473842206301113e-06, "loss": 0.5279, "step": 3465 }, { "epoch": 1.31, "learning_rate": 6.467683736364115e-06, "loss": 0.6031, "step": 3466 }, { "epoch": 1.31, "learning_rate": 6.461526796726639e-06, "loss": 0.5558, "step": 3467 }, { "epoch": 1.31, "learning_rate": 6.455371390056041e-06, "loss": 0.5892, "step": 3468 }, { "epoch": 1.31, "learning_rate": 6.449217519019016e-06, "loss": 0.5454, "step": 3469 }, { "epoch": 1.31, "learning_rate": 6.443065186281605e-06, "loss": 0.5167, "step": 3470 }, { "epoch": 1.31, "learning_rate": 6.436914394509166e-06, "loss": 0.5108, "step": 3471 }, { "epoch": 1.31, "learning_rate": 6.430765146366397e-06, "loss": 0.4859, "step": 3472 }, { "epoch": 1.31, "learning_rate": 6.424617444517327e-06, "loss": 0.6049, "step": 3473 }, { "epoch": 1.31, "learning_rate": 6.418471291625314e-06, "loss": 0.5657, "step": 3474 }, { "epoch": 1.31, "learning_rate": 6.4123266903530455e-06, "loss": 0.5369, "step": 3475 }, { "epoch": 1.31, "learning_rate": 6.40618364336254e-06, "loss": 0.5028, "step": 3476 }, { "epoch": 1.31, "learning_rate": 6.400042153315136e-06, "loss": 0.5449, "step": 3477 }, { "epoch": 1.31, "learning_rate": 6.393902222871507e-06, "loss": 0.5266, "step": 3478 }, { "epoch": 1.31, "learning_rate": 6.387763854691635e-06, "loss": 0.5863, "step": 3479 }, { "epoch": 1.31, "learning_rate": 6.381627051434842e-06, "loss": 0.5646, "step": 3480 }, { "epoch": 1.31, "learning_rate": 6.375491815759765e-06, "loss": 0.5358, "step": 3481 }, { "epoch": 1.31, "learning_rate": 6.369358150324356e-06, "loss": 0.5977, "step": 3482 }, { "epoch": 1.31, "learning_rate": 6.3632260577859e-06, "loss": 0.5425, "step": 3483 }, { "epoch": 1.31, "learning_rate": 6.357095540800991e-06, "loss": 0.5558, "step": 3484 }, { "epoch": 1.31, "learning_rate": 6.3509666020255436e-06, "loss": 0.5734, "step": 3485 }, { "epoch": 1.31, "learning_rate": 6.344839244114784e-06, "loss": 0.585, "step": 3486 }, { "epoch": 1.31, "learning_rate": 6.338713469723261e-06, "loss": 0.5696, "step": 3487 }, { "epoch": 1.32, "learning_rate": 6.332589281504829e-06, "loss": 0.5231, "step": 3488 }, { "epoch": 1.32, "learning_rate": 6.326466682112666e-06, "loss": 0.5867, "step": 3489 }, { "epoch": 1.32, "learning_rate": 6.320345674199253e-06, "loss": 0.5506, "step": 3490 }, { "epoch": 1.32, "learning_rate": 6.314226260416383e-06, "loss": 0.5581, "step": 3491 }, { "epoch": 1.32, "learning_rate": 6.308108443415164e-06, "loss": 0.576, "step": 3492 }, { "epoch": 1.32, "learning_rate": 6.301992225846001e-06, "loss": 0.568, "step": 3493 }, { "epoch": 1.32, "learning_rate": 6.295877610358613e-06, "loss": 0.4911, "step": 3494 }, { "epoch": 1.32, "learning_rate": 6.289764599602033e-06, "loss": 0.5354, "step": 3495 }, { "epoch": 1.32, "learning_rate": 6.283653196224583e-06, "loss": 0.5379, "step": 3496 }, { "epoch": 1.32, "learning_rate": 6.277543402873898e-06, "loss": 0.5235, "step": 3497 }, { "epoch": 1.32, "learning_rate": 6.2714352221969155e-06, "loss": 0.5318, "step": 3498 }, { "epoch": 1.32, "learning_rate": 6.265328656839871e-06, "loss": 0.5492, "step": 3499 }, { "epoch": 1.32, "learning_rate": 6.259223709448298e-06, "loss": 0.4962, "step": 3500 }, { "epoch": 1.32, "learning_rate": 6.253120382667039e-06, "loss": 0.5585, "step": 3501 }, { "epoch": 1.32, "learning_rate": 6.2470186791402265e-06, "loss": 0.5514, "step": 3502 }, { "epoch": 1.32, "learning_rate": 6.240918601511292e-06, "loss": 0.5275, "step": 3503 }, { "epoch": 1.32, "learning_rate": 6.234820152422958e-06, "loss": 0.5505, "step": 3504 }, { "epoch": 1.32, "learning_rate": 6.228723334517248e-06, "loss": 0.5668, "step": 3505 }, { "epoch": 1.32, "learning_rate": 6.222628150435477e-06, "loss": 0.498, "step": 3506 }, { "epoch": 1.32, "learning_rate": 6.2165346028182496e-06, "loss": 0.5612, "step": 3507 }, { "epoch": 1.32, "learning_rate": 6.2104426943054654e-06, "loss": 0.5282, "step": 3508 }, { "epoch": 1.32, "learning_rate": 6.204352427536311e-06, "loss": 0.537, "step": 3509 }, { "epoch": 1.32, "learning_rate": 6.198263805149265e-06, "loss": 0.535, "step": 3510 }, { "epoch": 1.32, "learning_rate": 6.192176829782086e-06, "loss": 0.4884, "step": 3511 }, { "epoch": 1.32, "learning_rate": 6.186091504071827e-06, "loss": 0.5269, "step": 3512 }, { "epoch": 1.32, "learning_rate": 6.180007830654822e-06, "loss": 0.565, "step": 3513 }, { "epoch": 1.33, "learning_rate": 6.173925812166695e-06, "loss": 0.511, "step": 3514 }, { "epoch": 1.33, "learning_rate": 6.167845451242345e-06, "loss": 0.5796, "step": 3515 }, { "epoch": 1.33, "learning_rate": 6.161766750515957e-06, "loss": 0.5432, "step": 3516 }, { "epoch": 1.33, "learning_rate": 6.155689712620998e-06, "loss": 0.5843, "step": 3517 }, { "epoch": 1.33, "learning_rate": 6.149614340190215e-06, "loss": 0.5782, "step": 3518 }, { "epoch": 1.33, "learning_rate": 6.1435406358556225e-06, "loss": 0.5643, "step": 3519 }, { "epoch": 1.33, "learning_rate": 6.137468602248533e-06, "loss": 0.5393, "step": 3520 }, { "epoch": 1.33, "learning_rate": 6.131398241999518e-06, "loss": 0.5663, "step": 3521 }, { "epoch": 1.33, "learning_rate": 6.125329557738427e-06, "loss": 0.5336, "step": 3522 }, { "epoch": 1.33, "learning_rate": 6.119262552094391e-06, "loss": 0.5215, "step": 3523 }, { "epoch": 1.33, "learning_rate": 6.113197227695807e-06, "loss": 0.5544, "step": 3524 }, { "epoch": 1.33, "learning_rate": 6.1071335871703454e-06, "loss": 0.5371, "step": 3525 }, { "epoch": 1.33, "learning_rate": 6.1010716331449416e-06, "loss": 0.5405, "step": 3526 }, { "epoch": 1.33, "learning_rate": 6.095011368245815e-06, "loss": 0.4818, "step": 3527 }, { "epoch": 1.33, "learning_rate": 6.088952795098442e-06, "loss": 0.5204, "step": 3528 }, { "epoch": 1.33, "learning_rate": 6.082895916327564e-06, "loss": 0.5075, "step": 3529 }, { "epoch": 1.33, "learning_rate": 6.076840734557196e-06, "loss": 0.5133, "step": 3530 }, { "epoch": 1.33, "learning_rate": 6.070787252410612e-06, "loss": 0.5219, "step": 3531 }, { "epoch": 1.33, "learning_rate": 6.064735472510352e-06, "loss": 0.5505, "step": 3532 }, { "epoch": 1.33, "learning_rate": 6.058685397478223e-06, "loss": 0.5406, "step": 3533 }, { "epoch": 1.33, "learning_rate": 6.052637029935284e-06, "loss": 0.5232, "step": 3534 }, { "epoch": 1.33, "learning_rate": 6.046590372501866e-06, "loss": 0.5041, "step": 3535 }, { "epoch": 1.33, "learning_rate": 6.040545427797544e-06, "loss": 0.5537, "step": 3536 }, { "epoch": 1.33, "learning_rate": 6.034502198441161e-06, "loss": 0.5375, "step": 3537 }, { "epoch": 1.33, "learning_rate": 6.028460687050818e-06, "loss": 0.6066, "step": 3538 }, { "epoch": 1.33, "learning_rate": 6.022420896243868e-06, "loss": 0.528, "step": 3539 }, { "epoch": 1.33, "learning_rate": 6.016382828636922e-06, "loss": 0.5773, "step": 3540 }, { "epoch": 1.34, "learning_rate": 6.010346486845837e-06, "loss": 0.5348, "step": 3541 }, { "epoch": 1.34, "learning_rate": 6.004311873485732e-06, "loss": 0.5797, "step": 3542 }, { "epoch": 1.34, "learning_rate": 5.9982789911709714e-06, "loss": 0.5486, "step": 3543 }, { "epoch": 1.34, "learning_rate": 5.9922478425151685e-06, "loss": 0.5229, "step": 3544 }, { "epoch": 1.34, "learning_rate": 5.986218430131186e-06, "loss": 0.5342, "step": 3545 }, { "epoch": 1.34, "learning_rate": 5.9801907566311444e-06, "loss": 0.5495, "step": 3546 }, { "epoch": 1.34, "learning_rate": 5.974164824626396e-06, "loss": 0.5612, "step": 3547 }, { "epoch": 1.34, "learning_rate": 5.968140636727549e-06, "loss": 0.5406, "step": 3548 }, { "epoch": 1.34, "learning_rate": 5.962118195544448e-06, "loss": 0.5129, "step": 3549 }, { "epoch": 1.34, "learning_rate": 5.956097503686191e-06, "loss": 0.5113, "step": 3550 }, { "epoch": 1.34, "learning_rate": 5.9500785637611026e-06, "loss": 0.5896, "step": 3551 }, { "epoch": 1.34, "learning_rate": 5.9440613783767686e-06, "loss": 0.5377, "step": 3552 }, { "epoch": 1.34, "learning_rate": 5.938045950140003e-06, "loss": 0.5727, "step": 3553 }, { "epoch": 1.34, "learning_rate": 5.932032281656854e-06, "loss": 0.519, "step": 3554 }, { "epoch": 1.34, "learning_rate": 5.926020375532617e-06, "loss": 0.5647, "step": 3555 }, { "epoch": 1.34, "learning_rate": 5.92001023437182e-06, "loss": 0.5102, "step": 3556 }, { "epoch": 1.34, "learning_rate": 5.914001860778223e-06, "loss": 0.589, "step": 3557 }, { "epoch": 1.34, "learning_rate": 5.907995257354831e-06, "loss": 0.5691, "step": 3558 }, { "epoch": 1.34, "learning_rate": 5.901990426703868e-06, "loss": 0.5439, "step": 3559 }, { "epoch": 1.34, "learning_rate": 5.895987371426799e-06, "loss": 0.5575, "step": 3560 }, { "epoch": 1.34, "learning_rate": 5.889986094124322e-06, "loss": 0.511, "step": 3561 }, { "epoch": 1.34, "learning_rate": 5.8839865973963524e-06, "loss": 0.485, "step": 3562 }, { "epoch": 1.34, "learning_rate": 5.877988883842046e-06, "loss": 0.53, "step": 3563 }, { "epoch": 1.34, "learning_rate": 5.8719929560597846e-06, "loss": 0.5142, "step": 3564 }, { "epoch": 1.34, "learning_rate": 5.8659988166471715e-06, "loss": 0.5251, "step": 3565 }, { "epoch": 1.34, "learning_rate": 5.860006468201038e-06, "loss": 0.5359, "step": 3566 }, { "epoch": 1.35, "learning_rate": 5.854015913317439e-06, "loss": 0.5866, "step": 3567 }, { "epoch": 1.35, "learning_rate": 5.8480271545916565e-06, "loss": 0.5315, "step": 3568 }, { "epoch": 1.35, "learning_rate": 5.842040194618185e-06, "loss": 0.5605, "step": 3569 }, { "epoch": 1.35, "learning_rate": 5.836055035990744e-06, "loss": 0.5546, "step": 3570 }, { "epoch": 1.35, "learning_rate": 5.830071681302281e-06, "loss": 0.5262, "step": 3571 }, { "epoch": 1.35, "learning_rate": 5.824090133144951e-06, "loss": 0.5364, "step": 3572 }, { "epoch": 1.35, "learning_rate": 5.818110394110133e-06, "loss": 0.5184, "step": 3573 }, { "epoch": 1.35, "learning_rate": 5.8121324667884145e-06, "loss": 0.5418, "step": 3574 }, { "epoch": 1.35, "learning_rate": 5.806156353769606e-06, "loss": 0.5427, "step": 3575 }, { "epoch": 1.35, "learning_rate": 5.800182057642722e-06, "loss": 0.5035, "step": 3576 }, { "epoch": 1.35, "learning_rate": 5.79420958099601e-06, "loss": 0.5283, "step": 3577 }, { "epoch": 1.35, "learning_rate": 5.788238926416908e-06, "loss": 0.5143, "step": 3578 }, { "epoch": 1.35, "learning_rate": 5.782270096492075e-06, "loss": 0.5405, "step": 3579 }, { "epoch": 1.35, "learning_rate": 5.776303093807376e-06, "loss": 0.5456, "step": 3580 }, { "epoch": 1.35, "learning_rate": 5.770337920947891e-06, "loss": 0.517, "step": 3581 }, { "epoch": 1.35, "learning_rate": 5.764374580497888e-06, "loss": 0.5283, "step": 3582 }, { "epoch": 1.35, "learning_rate": 5.758413075040869e-06, "loss": 0.543, "step": 3583 }, { "epoch": 1.35, "learning_rate": 5.752453407159521e-06, "loss": 0.5129, "step": 3584 }, { "epoch": 1.35, "learning_rate": 5.746495579435744e-06, "loss": 0.5445, "step": 3585 }, { "epoch": 1.35, "learning_rate": 5.740539594450637e-06, "loss": 0.5158, "step": 3586 }, { "epoch": 1.35, "learning_rate": 5.7345854547845e-06, "loss": 0.5908, "step": 3587 }, { "epoch": 1.35, "learning_rate": 5.728633163016839e-06, "loss": 0.5646, "step": 3588 }, { "epoch": 1.35, "learning_rate": 5.722682721726345e-06, "loss": 0.5535, "step": 3589 }, { "epoch": 1.35, "learning_rate": 5.71673413349093e-06, "loss": 0.5384, "step": 3590 }, { "epoch": 1.35, "learning_rate": 5.710787400887685e-06, "loss": 0.4963, "step": 3591 }, { "epoch": 1.35, "learning_rate": 5.704842526492904e-06, "loss": 0.5862, "step": 3592 }, { "epoch": 1.35, "learning_rate": 5.698899512882077e-06, "loss": 0.5281, "step": 3593 }, { "epoch": 1.36, "learning_rate": 5.692958362629883e-06, "loss": 0.537, "step": 3594 }, { "epoch": 1.36, "learning_rate": 5.687019078310199e-06, "loss": 0.5803, "step": 3595 }, { "epoch": 1.36, "learning_rate": 5.681081662496089e-06, "loss": 0.511, "step": 3596 }, { "epoch": 1.36, "learning_rate": 5.675146117759811e-06, "loss": 0.5135, "step": 3597 }, { "epoch": 1.36, "learning_rate": 5.669212446672811e-06, "loss": 0.5403, "step": 3598 }, { "epoch": 1.36, "learning_rate": 5.6632806518057255e-06, "loss": 0.5023, "step": 3599 }, { "epoch": 1.36, "learning_rate": 5.657350735728374e-06, "loss": 0.5652, "step": 3600 }, { "epoch": 1.36, "learning_rate": 5.6514227010097645e-06, "loss": 0.5856, "step": 3601 }, { "epoch": 1.36, "learning_rate": 5.645496550218089e-06, "loss": 0.5988, "step": 3602 }, { "epoch": 1.36, "learning_rate": 5.639572285920726e-06, "loss": 0.5359, "step": 3603 }, { "epoch": 1.36, "learning_rate": 5.633649910684232e-06, "loss": 0.5575, "step": 3604 }, { "epoch": 1.36, "learning_rate": 5.627729427074351e-06, "loss": 0.6103, "step": 3605 }, { "epoch": 1.36, "learning_rate": 5.621810837656001e-06, "loss": 0.5274, "step": 3606 }, { "epoch": 1.36, "learning_rate": 5.615894144993286e-06, "loss": 0.5312, "step": 3607 }, { "epoch": 1.36, "learning_rate": 5.6099793516494776e-06, "loss": 0.574, "step": 3608 }, { "epoch": 1.36, "learning_rate": 5.604066460187045e-06, "loss": 0.5159, "step": 3609 }, { "epoch": 1.36, "learning_rate": 5.5981554731676115e-06, "loss": 0.5328, "step": 3610 }, { "epoch": 1.36, "learning_rate": 5.592246393151984e-06, "loss": 0.5436, "step": 3611 }, { "epoch": 1.36, "learning_rate": 5.586339222700146e-06, "loss": 0.5019, "step": 3612 }, { "epoch": 1.36, "learning_rate": 5.580433964371248e-06, "loss": 0.5273, "step": 3613 }, { "epoch": 1.36, "learning_rate": 5.574530620723616e-06, "loss": 0.5296, "step": 3614 }, { "epoch": 1.36, "learning_rate": 5.56862919431475e-06, "loss": 0.5485, "step": 3615 }, { "epoch": 1.36, "learning_rate": 5.562729687701317e-06, "loss": 0.5656, "step": 3616 }, { "epoch": 1.36, "learning_rate": 5.556832103439142e-06, "loss": 0.5462, "step": 3617 }, { "epoch": 1.36, "learning_rate": 5.550936444083228e-06, "loss": 0.5845, "step": 3618 }, { "epoch": 1.36, "learning_rate": 5.545042712187745e-06, "loss": 0.4891, "step": 3619 }, { "epoch": 1.37, "learning_rate": 5.539150910306019e-06, "loss": 0.5715, "step": 3620 }, { "epoch": 1.37, "learning_rate": 5.533261040990554e-06, "loss": 0.5547, "step": 3621 }, { "epoch": 1.37, "learning_rate": 5.527373106793003e-06, "loss": 0.6119, "step": 3622 }, { "epoch": 1.37, "learning_rate": 5.521487110264187e-06, "loss": 0.5176, "step": 3623 }, { "epoch": 1.37, "learning_rate": 5.5156030539540904e-06, "loss": 0.4843, "step": 3624 }, { "epoch": 1.37, "learning_rate": 5.5097209404118445e-06, "loss": 0.5511, "step": 3625 }, { "epoch": 1.37, "learning_rate": 5.503840772185749e-06, "loss": 0.5448, "step": 3626 }, { "epoch": 1.37, "learning_rate": 5.497962551823266e-06, "loss": 0.5742, "step": 3627 }, { "epoch": 1.37, "learning_rate": 5.492086281871005e-06, "loss": 0.5599, "step": 3628 }, { "epoch": 1.37, "learning_rate": 5.486211964874732e-06, "loss": 0.514, "step": 3629 }, { "epoch": 1.37, "learning_rate": 5.480339603379365e-06, "loss": 0.5336, "step": 3630 }, { "epoch": 1.37, "learning_rate": 5.474469199928985e-06, "loss": 0.524, "step": 3631 }, { "epoch": 1.37, "learning_rate": 5.468600757066808e-06, "loss": 0.6077, "step": 3632 }, { "epoch": 1.37, "learning_rate": 5.462734277335209e-06, "loss": 0.5916, "step": 3633 }, { "epoch": 1.37, "learning_rate": 5.456869763275722e-06, "loss": 0.6044, "step": 3634 }, { "epoch": 1.37, "learning_rate": 5.4510072174290165e-06, "loss": 0.5472, "step": 3635 }, { "epoch": 1.37, "learning_rate": 5.445146642334913e-06, "loss": 0.5442, "step": 3636 }, { "epoch": 1.37, "learning_rate": 5.43928804053238e-06, "loss": 0.5627, "step": 3637 }, { "epoch": 1.37, "learning_rate": 5.433431414559528e-06, "loss": 0.54, "step": 3638 }, { "epoch": 1.37, "learning_rate": 5.427576766953615e-06, "loss": 0.5467, "step": 3639 }, { "epoch": 1.37, "learning_rate": 5.42172410025104e-06, "loss": 0.5872, "step": 3640 }, { "epoch": 1.37, "learning_rate": 5.415873416987345e-06, "loss": 0.5767, "step": 3641 }, { "epoch": 1.37, "learning_rate": 5.410024719697209e-06, "loss": 0.5291, "step": 3642 }, { "epoch": 1.37, "learning_rate": 5.404178010914456e-06, "loss": 0.5213, "step": 3643 }, { "epoch": 1.37, "learning_rate": 5.398333293172044e-06, "loss": 0.5306, "step": 3644 }, { "epoch": 1.37, "learning_rate": 5.392490569002072e-06, "loss": 0.5126, "step": 3645 }, { "epoch": 1.37, "learning_rate": 5.386649840935774e-06, "loss": 0.5111, "step": 3646 }, { "epoch": 1.38, "learning_rate": 5.380811111503517e-06, "loss": 0.5456, "step": 3647 }, { "epoch": 1.38, "learning_rate": 5.374974383234805e-06, "loss": 0.5499, "step": 3648 }, { "epoch": 1.38, "learning_rate": 5.3691396586582755e-06, "loss": 0.568, "step": 3649 }, { "epoch": 1.38, "learning_rate": 5.363306940301694e-06, "loss": 0.5664, "step": 3650 }, { "epoch": 1.38, "learning_rate": 5.35747623069196e-06, "loss": 0.5481, "step": 3651 }, { "epoch": 1.38, "learning_rate": 5.351647532355106e-06, "loss": 0.5851, "step": 3652 }, { "epoch": 1.38, "learning_rate": 5.345820847816284e-06, "loss": 0.5557, "step": 3653 }, { "epoch": 1.38, "learning_rate": 5.33999617959978e-06, "loss": 0.5087, "step": 3654 }, { "epoch": 1.38, "learning_rate": 5.334173530229009e-06, "loss": 0.5351, "step": 3655 }, { "epoch": 1.38, "learning_rate": 5.3283529022265035e-06, "loss": 0.5552, "step": 3656 }, { "epoch": 1.38, "learning_rate": 5.322534298113926e-06, "loss": 0.5441, "step": 3657 }, { "epoch": 1.38, "learning_rate": 5.316717720412059e-06, "loss": 0.4881, "step": 3658 }, { "epoch": 1.38, "learning_rate": 5.310903171640817e-06, "loss": 0.5007, "step": 3659 }, { "epoch": 1.38, "learning_rate": 5.3050906543192175e-06, "loss": 0.5255, "step": 3660 }, { "epoch": 1.38, "learning_rate": 5.299280170965411e-06, "loss": 0.6093, "step": 3661 }, { "epoch": 1.38, "learning_rate": 5.293471724096664e-06, "loss": 0.5627, "step": 3662 }, { "epoch": 1.38, "learning_rate": 5.28766531622936e-06, "loss": 0.5576, "step": 3663 }, { "epoch": 1.38, "learning_rate": 5.2818609498789965e-06, "loss": 0.5408, "step": 3664 }, { "epoch": 1.38, "learning_rate": 5.2760586275601955e-06, "loss": 0.5462, "step": 3665 }, { "epoch": 1.38, "learning_rate": 5.270258351786686e-06, "loss": 0.5343, "step": 3666 }, { "epoch": 1.38, "learning_rate": 5.264460125071312e-06, "loss": 0.6007, "step": 3667 }, { "epoch": 1.38, "learning_rate": 5.258663949926026e-06, "loss": 0.5471, "step": 3668 }, { "epoch": 1.38, "learning_rate": 5.252869828861897e-06, "loss": 0.5052, "step": 3669 }, { "epoch": 1.38, "learning_rate": 5.247077764389099e-06, "loss": 0.5895, "step": 3670 }, { "epoch": 1.38, "learning_rate": 5.2412877590169265e-06, "loss": 0.5368, "step": 3671 }, { "epoch": 1.38, "learning_rate": 5.23549981525377e-06, "loss": 0.5789, "step": 3672 }, { "epoch": 1.38, "learning_rate": 5.2297139356071315e-06, "loss": 0.5366, "step": 3673 }, { "epoch": 1.39, "learning_rate": 5.22393012258362e-06, "loss": 0.5168, "step": 3674 }, { "epoch": 1.39, "learning_rate": 5.218148378688941e-06, "loss": 0.5418, "step": 3675 }, { "epoch": 1.39, "learning_rate": 5.212368706427913e-06, "loss": 0.5394, "step": 3676 }, { "epoch": 1.39, "learning_rate": 5.20659110830445e-06, "loss": 0.5442, "step": 3677 }, { "epoch": 1.39, "learning_rate": 5.2008155868215794e-06, "loss": 0.5333, "step": 3678 }, { "epoch": 1.39, "learning_rate": 5.195042144481416e-06, "loss": 0.5173, "step": 3679 }, { "epoch": 1.39, "learning_rate": 5.189270783785181e-06, "loss": 0.5804, "step": 3680 }, { "epoch": 1.39, "learning_rate": 5.183501507233192e-06, "loss": 0.5764, "step": 3681 }, { "epoch": 1.39, "learning_rate": 5.177734317324859e-06, "loss": 0.5713, "step": 3682 }, { "epoch": 1.39, "learning_rate": 5.171969216558688e-06, "loss": 0.503, "step": 3683 }, { "epoch": 1.39, "learning_rate": 5.166206207432294e-06, "loss": 0.5569, "step": 3684 }, { "epoch": 1.39, "learning_rate": 5.160445292442373e-06, "loss": 0.5201, "step": 3685 }, { "epoch": 1.39, "learning_rate": 5.154686474084716e-06, "loss": 0.5124, "step": 3686 }, { "epoch": 1.39, "learning_rate": 5.148929754854206e-06, "loss": 0.5508, "step": 3687 }, { "epoch": 1.39, "learning_rate": 5.1431751372448155e-06, "loss": 0.5164, "step": 3688 }, { "epoch": 1.39, "learning_rate": 5.13742262374961e-06, "loss": 0.5514, "step": 3689 }, { "epoch": 1.39, "learning_rate": 5.1316722168607425e-06, "loss": 0.5121, "step": 3690 }, { "epoch": 1.39, "learning_rate": 5.125923919069449e-06, "loss": 0.5981, "step": 3691 }, { "epoch": 1.39, "learning_rate": 5.120177732866056e-06, "loss": 0.5914, "step": 3692 }, { "epoch": 1.39, "learning_rate": 5.114433660739976e-06, "loss": 0.5566, "step": 3693 }, { "epoch": 1.39, "learning_rate": 5.108691705179701e-06, "loss": 0.5784, "step": 3694 }, { "epoch": 1.39, "learning_rate": 5.102951868672812e-06, "loss": 0.5346, "step": 3695 }, { "epoch": 1.39, "learning_rate": 5.097214153705967e-06, "loss": 0.5124, "step": 3696 }, { "epoch": 1.39, "learning_rate": 5.0914785627649066e-06, "loss": 0.5851, "step": 3697 }, { "epoch": 1.39, "learning_rate": 5.085745098334451e-06, "loss": 0.5326, "step": 3698 }, { "epoch": 1.39, "learning_rate": 5.0800137628985014e-06, "loss": 0.5357, "step": 3699 }, { "epoch": 1.4, "learning_rate": 5.0742845589400346e-06, "loss": 0.5387, "step": 3700 }, { "epoch": 1.4, "learning_rate": 5.068557488941102e-06, "loss": 0.5273, "step": 3701 }, { "epoch": 1.4, "learning_rate": 5.062832555382834e-06, "loss": 0.5627, "step": 3702 }, { "epoch": 1.4, "learning_rate": 5.057109760745433e-06, "loss": 0.5434, "step": 3703 }, { "epoch": 1.4, "learning_rate": 5.0513891075081774e-06, "loss": 0.5901, "step": 3704 }, { "epoch": 1.4, "learning_rate": 5.045670598149417e-06, "loss": 0.5528, "step": 3705 }, { "epoch": 1.4, "learning_rate": 5.039954235146569e-06, "loss": 0.571, "step": 3706 }, { "epoch": 1.4, "learning_rate": 5.034240020976127e-06, "loss": 0.4848, "step": 3707 }, { "epoch": 1.4, "learning_rate": 5.028527958113645e-06, "loss": 0.5356, "step": 3708 }, { "epoch": 1.4, "learning_rate": 5.022818049033762e-06, "loss": 0.5606, "step": 3709 }, { "epoch": 1.4, "learning_rate": 5.0171102962101614e-06, "loss": 0.5452, "step": 3710 }, { "epoch": 1.4, "learning_rate": 5.011404702115608e-06, "loss": 0.5399, "step": 3711 }, { "epoch": 1.4, "learning_rate": 5.005701269221926e-06, "loss": 0.5283, "step": 3712 }, { "epoch": 1.4, "learning_rate": 5.000000000000003e-06, "loss": 0.5164, "step": 3713 }, { "epoch": 1.4, "learning_rate": 4.994300896919792e-06, "loss": 0.5311, "step": 3714 }, { "epoch": 1.4, "learning_rate": 4.988603962450303e-06, "loss": 0.5343, "step": 3715 }, { "epoch": 1.4, "learning_rate": 4.982909199059614e-06, "loss": 0.5365, "step": 3716 }, { "epoch": 1.4, "learning_rate": 4.977216609214863e-06, "loss": 0.526, "step": 3717 }, { "epoch": 1.4, "learning_rate": 4.971526195382228e-06, "loss": 0.5535, "step": 3718 }, { "epoch": 1.4, "learning_rate": 4.965837960026965e-06, "loss": 0.5293, "step": 3719 }, { "epoch": 1.4, "learning_rate": 4.960151905613377e-06, "loss": 0.4998, "step": 3720 }, { "epoch": 1.4, "learning_rate": 4.954468034604821e-06, "loss": 0.5789, "step": 3721 }, { "epoch": 1.4, "learning_rate": 4.948786349463719e-06, "loss": 0.5398, "step": 3722 }, { "epoch": 1.4, "learning_rate": 4.9431068526515335e-06, "loss": 0.5148, "step": 3723 }, { "epoch": 1.4, "learning_rate": 4.937429546628787e-06, "loss": 0.5354, "step": 3724 }, { "epoch": 1.4, "learning_rate": 4.931754433855043e-06, "loss": 0.5454, "step": 3725 }, { "epoch": 1.4, "learning_rate": 4.926081516788923e-06, "loss": 0.5462, "step": 3726 }, { "epoch": 1.41, "learning_rate": 4.920410797888094e-06, "loss": 0.4891, "step": 3727 }, { "epoch": 1.41, "learning_rate": 4.914742279609278e-06, "loss": 0.4903, "step": 3728 }, { "epoch": 1.41, "learning_rate": 4.909075964408235e-06, "loss": 0.532, "step": 3729 }, { "epoch": 1.41, "learning_rate": 4.903411854739773e-06, "loss": 0.5249, "step": 3730 }, { "epoch": 1.41, "learning_rate": 4.897749953057745e-06, "loss": 0.53, "step": 3731 }, { "epoch": 1.41, "learning_rate": 4.89209026181505e-06, "loss": 0.5172, "step": 3732 }, { "epoch": 1.41, "learning_rate": 4.886432783463616e-06, "loss": 0.5243, "step": 3733 }, { "epoch": 1.41, "learning_rate": 4.8807775204544346e-06, "loss": 0.6436, "step": 3734 }, { "epoch": 1.41, "learning_rate": 4.875124475237523e-06, "loss": 0.5709, "step": 3735 }, { "epoch": 1.41, "learning_rate": 4.86947365026194e-06, "loss": 0.5829, "step": 3736 }, { "epoch": 1.41, "learning_rate": 4.863825047975782e-06, "loss": 0.5974, "step": 3737 }, { "epoch": 1.41, "learning_rate": 4.858178670826184e-06, "loss": 0.5451, "step": 3738 }, { "epoch": 1.41, "learning_rate": 4.852534521259322e-06, "loss": 0.4954, "step": 3739 }, { "epoch": 1.41, "learning_rate": 4.846892601720389e-06, "loss": 0.5346, "step": 3740 }, { "epoch": 1.41, "learning_rate": 4.841252914653635e-06, "loss": 0.5444, "step": 3741 }, { "epoch": 1.41, "learning_rate": 4.83561546250233e-06, "loss": 0.5335, "step": 3742 }, { "epoch": 1.41, "learning_rate": 4.829980247708776e-06, "loss": 0.4909, "step": 3743 }, { "epoch": 1.41, "learning_rate": 4.8243472727143114e-06, "loss": 0.5534, "step": 3744 }, { "epoch": 1.41, "learning_rate": 4.818716539959297e-06, "loss": 0.5366, "step": 3745 }, { "epoch": 1.41, "learning_rate": 4.813088051883128e-06, "loss": 0.5216, "step": 3746 }, { "epoch": 1.41, "learning_rate": 4.807461810924223e-06, "loss": 0.6082, "step": 3747 }, { "epoch": 1.41, "learning_rate": 4.801837819520031e-06, "loss": 0.5519, "step": 3748 }, { "epoch": 1.41, "learning_rate": 4.796216080107025e-06, "loss": 0.5472, "step": 3749 }, { "epoch": 1.41, "learning_rate": 4.790596595120699e-06, "loss": 0.5287, "step": 3750 }, { "epoch": 1.41, "learning_rate": 4.784979366995574e-06, "loss": 0.5275, "step": 3751 }, { "epoch": 1.41, "learning_rate": 4.779364398165195e-06, "loss": 0.561, "step": 3752 }, { "epoch": 1.42, "learning_rate": 4.773751691062123e-06, "loss": 0.5692, "step": 3753 }, { "epoch": 1.42, "learning_rate": 4.7681412481179416e-06, "loss": 0.6153, "step": 3754 }, { "epoch": 1.42, "learning_rate": 4.762533071763255e-06, "loss": 0.5328, "step": 3755 }, { "epoch": 1.42, "learning_rate": 4.756927164427685e-06, "loss": 0.5564, "step": 3756 }, { "epoch": 1.42, "learning_rate": 4.751323528539867e-06, "loss": 0.5578, "step": 3757 }, { "epoch": 1.42, "learning_rate": 4.745722166527458e-06, "loss": 0.5851, "step": 3758 }, { "epoch": 1.42, "learning_rate": 4.7401230808171196e-06, "loss": 0.5118, "step": 3759 }, { "epoch": 1.42, "learning_rate": 4.734526273834548e-06, "loss": 0.5723, "step": 3760 }, { "epoch": 1.42, "learning_rate": 4.728931748004426e-06, "loss": 0.5614, "step": 3761 }, { "epoch": 1.42, "learning_rate": 4.723339505750466e-06, "loss": 0.5282, "step": 3762 }, { "epoch": 1.42, "learning_rate": 4.717749549495385e-06, "loss": 0.5212, "step": 3763 }, { "epoch": 1.42, "learning_rate": 4.712161881660911e-06, "loss": 0.5028, "step": 3764 }, { "epoch": 1.42, "learning_rate": 4.706576504667775e-06, "loss": 0.5301, "step": 3765 }, { "epoch": 1.42, "learning_rate": 4.700993420935729e-06, "loss": 0.5692, "step": 3766 }, { "epoch": 1.42, "learning_rate": 4.695412632883524e-06, "loss": 0.5196, "step": 3767 }, { "epoch": 1.42, "learning_rate": 4.689834142928905e-06, "loss": 0.5455, "step": 3768 }, { "epoch": 1.42, "learning_rate": 4.68425795348864e-06, "loss": 0.5588, "step": 3769 }, { "epoch": 1.42, "learning_rate": 4.678684066978489e-06, "loss": 0.5494, "step": 3770 }, { "epoch": 1.42, "learning_rate": 4.673112485813216e-06, "loss": 0.5503, "step": 3771 }, { "epoch": 1.42, "learning_rate": 4.667543212406593e-06, "loss": 0.5739, "step": 3772 }, { "epoch": 1.42, "learning_rate": 4.661976249171386e-06, "loss": 0.5082, "step": 3773 }, { "epoch": 1.42, "learning_rate": 4.656411598519359e-06, "loss": 0.5506, "step": 3774 }, { "epoch": 1.42, "learning_rate": 4.650849262861282e-06, "loss": 0.5868, "step": 3775 }, { "epoch": 1.42, "learning_rate": 4.645289244606907e-06, "loss": 0.63, "step": 3776 }, { "epoch": 1.42, "learning_rate": 4.6397315461649975e-06, "loss": 0.5632, "step": 3777 }, { "epoch": 1.42, "learning_rate": 4.634176169943301e-06, "loss": 0.565, "step": 3778 }, { "epoch": 1.42, "learning_rate": 4.62862311834857e-06, "loss": 0.4845, "step": 3779 }, { "epoch": 1.43, "learning_rate": 4.623072393786543e-06, "loss": 0.5124, "step": 3780 }, { "epoch": 1.43, "learning_rate": 4.617523998661949e-06, "loss": 0.5286, "step": 3781 }, { "epoch": 1.43, "learning_rate": 4.6119779353785155e-06, "loss": 0.5597, "step": 3782 }, { "epoch": 1.43, "learning_rate": 4.606434206338945e-06, "loss": 0.5512, "step": 3783 }, { "epoch": 1.43, "learning_rate": 4.6008928139449385e-06, "loss": 0.5304, "step": 3784 }, { "epoch": 1.43, "learning_rate": 4.595353760597193e-06, "loss": 0.5711, "step": 3785 }, { "epoch": 1.43, "learning_rate": 4.589817048695377e-06, "loss": 0.5543, "step": 3786 }, { "epoch": 1.43, "learning_rate": 4.584282680638155e-06, "loss": 0.5302, "step": 3787 }, { "epoch": 1.43, "learning_rate": 4.578750658823169e-06, "loss": 0.5252, "step": 3788 }, { "epoch": 1.43, "learning_rate": 4.573220985647049e-06, "loss": 0.5411, "step": 3789 }, { "epoch": 1.43, "learning_rate": 4.567693663505405e-06, "loss": 0.537, "step": 3790 }, { "epoch": 1.43, "learning_rate": 4.56216869479283e-06, "loss": 0.5751, "step": 3791 }, { "epoch": 1.43, "learning_rate": 4.556646081902898e-06, "loss": 0.5262, "step": 3792 }, { "epoch": 1.43, "learning_rate": 4.551125827228161e-06, "loss": 0.5916, "step": 3793 }, { "epoch": 1.43, "learning_rate": 4.5456079331601485e-06, "loss": 0.5104, "step": 3794 }, { "epoch": 1.43, "learning_rate": 4.54009240208937e-06, "loss": 0.5384, "step": 3795 }, { "epoch": 1.43, "learning_rate": 4.53457923640531e-06, "loss": 0.5397, "step": 3796 }, { "epoch": 1.43, "learning_rate": 4.529068438496427e-06, "loss": 0.6118, "step": 3797 }, { "epoch": 1.43, "learning_rate": 4.523560010750157e-06, "loss": 0.5542, "step": 3798 }, { "epoch": 1.43, "learning_rate": 4.518053955552903e-06, "loss": 0.537, "step": 3799 }, { "epoch": 1.43, "learning_rate": 4.512550275290049e-06, "loss": 0.5381, "step": 3800 }, { "epoch": 1.43, "learning_rate": 4.507048972345941e-06, "loss": 0.5324, "step": 3801 }, { "epoch": 1.43, "learning_rate": 4.501550049103902e-06, "loss": 0.5202, "step": 3802 }, { "epoch": 1.43, "learning_rate": 4.4960535079462206e-06, "loss": 0.5427, "step": 3803 }, { "epoch": 1.43, "learning_rate": 4.490559351254156e-06, "loss": 0.5404, "step": 3804 }, { "epoch": 1.43, "learning_rate": 4.485067581407929e-06, "loss": 0.6072, "step": 3805 }, { "epoch": 1.44, "learning_rate": 4.479578200786732e-06, "loss": 0.5807, "step": 3806 }, { "epoch": 1.44, "learning_rate": 4.474091211768721e-06, "loss": 0.5154, "step": 3807 }, { "epoch": 1.44, "learning_rate": 4.468606616731014e-06, "loss": 0.5309, "step": 3808 }, { "epoch": 1.44, "learning_rate": 4.4631244180496894e-06, "loss": 0.5831, "step": 3809 }, { "epoch": 1.44, "learning_rate": 4.457644618099801e-06, "loss": 0.5527, "step": 3810 }, { "epoch": 1.44, "learning_rate": 4.452167219255346e-06, "loss": 0.5025, "step": 3811 }, { "epoch": 1.44, "learning_rate": 4.4466922238892884e-06, "loss": 0.5327, "step": 3812 }, { "epoch": 1.44, "learning_rate": 4.441219634373555e-06, "loss": 0.5069, "step": 3813 }, { "epoch": 1.44, "learning_rate": 4.435749453079023e-06, "loss": 0.6138, "step": 3814 }, { "epoch": 1.44, "learning_rate": 4.430281682375529e-06, "loss": 0.5434, "step": 3815 }, { "epoch": 1.44, "learning_rate": 4.424816324631873e-06, "loss": 0.4978, "step": 3816 }, { "epoch": 1.44, "learning_rate": 4.419353382215799e-06, "loss": 0.4983, "step": 3817 }, { "epoch": 1.44, "learning_rate": 4.413892857494011e-06, "loss": 0.548, "step": 3818 }, { "epoch": 1.44, "learning_rate": 4.408434752832158e-06, "loss": 0.5444, "step": 3819 }, { "epoch": 1.44, "learning_rate": 4.402979070594848e-06, "loss": 0.5164, "step": 3820 }, { "epoch": 1.44, "learning_rate": 4.3975258131456365e-06, "loss": 0.5717, "step": 3821 }, { "epoch": 1.44, "learning_rate": 4.392074982847028e-06, "loss": 0.5544, "step": 3822 }, { "epoch": 1.44, "learning_rate": 4.386626582060481e-06, "loss": 0.5574, "step": 3823 }, { "epoch": 1.44, "learning_rate": 4.381180613146396e-06, "loss": 0.5336, "step": 3824 }, { "epoch": 1.44, "learning_rate": 4.375737078464122e-06, "loss": 0.5292, "step": 3825 }, { "epoch": 1.44, "learning_rate": 4.370295980371947e-06, "loss": 0.5642, "step": 3826 }, { "epoch": 1.44, "learning_rate": 4.364857321227112e-06, "loss": 0.56, "step": 3827 }, { "epoch": 1.44, "learning_rate": 4.3594211033857965e-06, "loss": 0.5424, "step": 3828 }, { "epoch": 1.44, "learning_rate": 4.35398732920313e-06, "loss": 0.5537, "step": 3829 }, { "epoch": 1.44, "learning_rate": 4.3485560010331725e-06, "loss": 0.5283, "step": 3830 }, { "epoch": 1.44, "learning_rate": 4.3431271212289306e-06, "loss": 0.5448, "step": 3831 }, { "epoch": 1.44, "learning_rate": 4.337700692142349e-06, "loss": 0.5429, "step": 3832 }, { "epoch": 1.45, "learning_rate": 4.332276716124314e-06, "loss": 0.5282, "step": 3833 }, { "epoch": 1.45, "learning_rate": 4.3268551955246355e-06, "loss": 0.5271, "step": 3834 }, { "epoch": 1.45, "learning_rate": 4.321436132692079e-06, "loss": 0.552, "step": 3835 }, { "epoch": 1.45, "learning_rate": 4.316019529974336e-06, "loss": 0.5523, "step": 3836 }, { "epoch": 1.45, "learning_rate": 4.310605389718028e-06, "loss": 0.5606, "step": 3837 }, { "epoch": 1.45, "learning_rate": 4.305193714268716e-06, "loss": 0.5347, "step": 3838 }, { "epoch": 1.45, "learning_rate": 4.299784505970891e-06, "loss": 0.5617, "step": 3839 }, { "epoch": 1.45, "learning_rate": 4.294377767167976e-06, "loss": 0.5874, "step": 3840 }, { "epoch": 1.45, "learning_rate": 4.288973500202324e-06, "loss": 0.5735, "step": 3841 }, { "epoch": 1.45, "learning_rate": 4.283571707415214e-06, "loss": 0.5401, "step": 3842 }, { "epoch": 1.45, "learning_rate": 4.278172391146857e-06, "loss": 0.5638, "step": 3843 }, { "epoch": 1.45, "learning_rate": 4.2727755537363915e-06, "loss": 0.5909, "step": 3844 }, { "epoch": 1.45, "learning_rate": 4.2673811975218785e-06, "loss": 0.5201, "step": 3845 }, { "epoch": 1.45, "learning_rate": 4.2619893248403065e-06, "loss": 0.5466, "step": 3846 }, { "epoch": 1.45, "learning_rate": 4.256599938027588e-06, "loss": 0.5426, "step": 3847 }, { "epoch": 1.45, "learning_rate": 4.251213039418558e-06, "loss": 0.5268, "step": 3848 }, { "epoch": 1.45, "learning_rate": 4.245828631346973e-06, "loss": 0.5492, "step": 3849 }, { "epoch": 1.45, "learning_rate": 4.240446716145512e-06, "loss": 0.5779, "step": 3850 }, { "epoch": 1.45, "learning_rate": 4.235067296145772e-06, "loss": 0.6047, "step": 3851 }, { "epoch": 1.45, "learning_rate": 4.229690373678271e-06, "loss": 0.5398, "step": 3852 }, { "epoch": 1.45, "learning_rate": 4.224315951072445e-06, "loss": 0.545, "step": 3853 }, { "epoch": 1.45, "learning_rate": 4.2189440306566456e-06, "loss": 0.5093, "step": 3854 }, { "epoch": 1.45, "learning_rate": 4.2135746147581405e-06, "loss": 0.5046, "step": 3855 }, { "epoch": 1.45, "learning_rate": 4.208207705703114e-06, "loss": 0.5552, "step": 3856 }, { "epoch": 1.45, "learning_rate": 4.2028433058166606e-06, "loss": 0.5358, "step": 3857 }, { "epoch": 1.45, "learning_rate": 4.197481417422793e-06, "loss": 0.5791, "step": 3858 }, { "epoch": 1.46, "learning_rate": 4.1921220428444285e-06, "loss": 0.5699, "step": 3859 }, { "epoch": 1.46, "learning_rate": 4.186765184403407e-06, "loss": 0.5465, "step": 3860 }, { "epoch": 1.46, "learning_rate": 4.181410844420473e-06, "loss": 0.5505, "step": 3861 }, { "epoch": 1.46, "learning_rate": 4.17605902521527e-06, "loss": 0.548, "step": 3862 }, { "epoch": 1.46, "learning_rate": 4.170709729106362e-06, "loss": 0.5294, "step": 3863 }, { "epoch": 1.46, "learning_rate": 4.165362958411215e-06, "loss": 0.5237, "step": 3864 }, { "epoch": 1.46, "learning_rate": 4.1600187154462e-06, "loss": 0.5485, "step": 3865 }, { "epoch": 1.46, "learning_rate": 4.154677002526594e-06, "loss": 0.539, "step": 3866 }, { "epoch": 1.46, "learning_rate": 4.149337821966583e-06, "loss": 0.5251, "step": 3867 }, { "epoch": 1.46, "learning_rate": 4.144001176079254e-06, "loss": 0.55, "step": 3868 }, { "epoch": 1.46, "learning_rate": 4.1386670671765834e-06, "loss": 0.508, "step": 3869 }, { "epoch": 1.46, "learning_rate": 4.133335497569465e-06, "loss": 0.5248, "step": 3870 }, { "epoch": 1.46, "learning_rate": 4.128006469567682e-06, "loss": 0.5743, "step": 3871 }, { "epoch": 1.46, "learning_rate": 4.1226799854799205e-06, "loss": 0.6131, "step": 3872 }, { "epoch": 1.46, "learning_rate": 4.1173560476137695e-06, "loss": 0.5503, "step": 3873 }, { "epoch": 1.46, "learning_rate": 4.112034658275707e-06, "loss": 0.5108, "step": 3874 }, { "epoch": 1.46, "learning_rate": 4.106715819771109e-06, "loss": 0.5324, "step": 3875 }, { "epoch": 1.46, "learning_rate": 4.10139953440425e-06, "loss": 0.5445, "step": 3876 }, { "epoch": 1.46, "learning_rate": 4.09608580447829e-06, "loss": 0.5626, "step": 3877 }, { "epoch": 1.46, "learning_rate": 4.090774632295287e-06, "loss": 0.5398, "step": 3878 }, { "epoch": 1.46, "learning_rate": 4.085466020156198e-06, "loss": 0.5375, "step": 3879 }, { "epoch": 1.46, "learning_rate": 4.08015997036086e-06, "loss": 0.5742, "step": 3880 }, { "epoch": 1.46, "learning_rate": 4.074856485208006e-06, "loss": 0.5459, "step": 3881 }, { "epoch": 1.46, "learning_rate": 4.069555566995256e-06, "loss": 0.5118, "step": 3882 }, { "epoch": 1.46, "learning_rate": 4.064257218019119e-06, "loss": 0.5044, "step": 3883 }, { "epoch": 1.46, "learning_rate": 4.058961440574984e-06, "loss": 0.5576, "step": 3884 }, { "epoch": 1.46, "learning_rate": 4.053668236957135e-06, "loss": 0.5337, "step": 3885 }, { "epoch": 1.47, "learning_rate": 4.048377609458739e-06, "loss": 0.602, "step": 3886 }, { "epoch": 1.47, "learning_rate": 4.043089560371848e-06, "loss": 0.5316, "step": 3887 }, { "epoch": 1.47, "learning_rate": 4.037804091987391e-06, "loss": 0.5455, "step": 3888 }, { "epoch": 1.47, "learning_rate": 4.032521206595185e-06, "loss": 0.5676, "step": 3889 }, { "epoch": 1.47, "learning_rate": 4.027240906483924e-06, "loss": 0.5805, "step": 3890 }, { "epoch": 1.47, "learning_rate": 4.0219631939411845e-06, "loss": 0.4974, "step": 3891 }, { "epoch": 1.47, "learning_rate": 4.016688071253419e-06, "loss": 0.5657, "step": 3892 }, { "epoch": 1.47, "learning_rate": 4.011415540705965e-06, "loss": 0.5462, "step": 3893 }, { "epoch": 1.47, "learning_rate": 4.006145604583028e-06, "loss": 0.5685, "step": 3894 }, { "epoch": 1.47, "learning_rate": 4.000878265167695e-06, "loss": 0.5127, "step": 3895 }, { "epoch": 1.47, "learning_rate": 3.995613524741927e-06, "loss": 0.5657, "step": 3896 }, { "epoch": 1.47, "learning_rate": 3.9903513855865586e-06, "loss": 0.5334, "step": 3897 }, { "epoch": 1.47, "learning_rate": 3.9850918499812976e-06, "loss": 0.5115, "step": 3898 }, { "epoch": 1.47, "learning_rate": 3.979834920204723e-06, "loss": 0.4936, "step": 3899 }, { "epoch": 1.47, "learning_rate": 3.9745805985342865e-06, "loss": 0.539, "step": 3900 }, { "epoch": 1.47, "learning_rate": 3.96932888724631e-06, "loss": 0.5745, "step": 3901 }, { "epoch": 1.47, "learning_rate": 3.964079788615983e-06, "loss": 0.5751, "step": 3902 }, { "epoch": 1.47, "learning_rate": 3.95883330491736e-06, "loss": 0.5106, "step": 3903 }, { "epoch": 1.47, "learning_rate": 3.9535894384233764e-06, "loss": 0.4976, "step": 3904 }, { "epoch": 1.47, "learning_rate": 3.948348191405815e-06, "loss": 0.4885, "step": 3905 }, { "epoch": 1.47, "learning_rate": 3.9431095661353344e-06, "loss": 0.5385, "step": 3906 }, { "epoch": 1.47, "learning_rate": 3.937873564881457e-06, "loss": 0.5018, "step": 3907 }, { "epoch": 1.47, "learning_rate": 3.932640189912566e-06, "loss": 0.5356, "step": 3908 }, { "epoch": 1.47, "learning_rate": 3.927409443495908e-06, "loss": 0.5321, "step": 3909 }, { "epoch": 1.47, "learning_rate": 3.9221813278975865e-06, "loss": 0.5002, "step": 3910 }, { "epoch": 1.47, "learning_rate": 3.916955845382581e-06, "loss": 0.5468, "step": 3911 }, { "epoch": 1.48, "learning_rate": 3.9117329982147066e-06, "loss": 0.5208, "step": 3912 }, { "epoch": 1.48, "learning_rate": 3.906512788656653e-06, "loss": 0.526, "step": 3913 }, { "epoch": 1.48, "learning_rate": 3.901295218969965e-06, "loss": 0.5189, "step": 3914 }, { "epoch": 1.48, "learning_rate": 3.896080291415039e-06, "loss": 0.5761, "step": 3915 }, { "epoch": 1.48, "learning_rate": 3.890868008251125e-06, "loss": 0.56, "step": 3916 }, { "epoch": 1.48, "learning_rate": 3.885658371736342e-06, "loss": 0.542, "step": 3917 }, { "epoch": 1.48, "learning_rate": 3.880451384127647e-06, "loss": 0.4942, "step": 3918 }, { "epoch": 1.48, "learning_rate": 3.875247047680858e-06, "loss": 0.5689, "step": 3919 }, { "epoch": 1.48, "learning_rate": 3.870045364650634e-06, "loss": 0.5752, "step": 3920 }, { "epoch": 1.48, "learning_rate": 3.8648463372904955e-06, "loss": 0.5198, "step": 3921 }, { "epoch": 1.48, "learning_rate": 3.8596499678528054e-06, "loss": 0.5364, "step": 3922 }, { "epoch": 1.48, "learning_rate": 3.854456258588785e-06, "loss": 0.4935, "step": 3923 }, { "epoch": 1.48, "learning_rate": 3.849265211748491e-06, "loss": 0.5206, "step": 3924 }, { "epoch": 1.48, "learning_rate": 3.844076829580834e-06, "loss": 0.5454, "step": 3925 }, { "epoch": 1.48, "learning_rate": 3.838891114333571e-06, "loss": 0.5172, "step": 3926 }, { "epoch": 1.48, "learning_rate": 3.833708068253295e-06, "loss": 0.588, "step": 3927 }, { "epoch": 1.48, "learning_rate": 3.828527693585451e-06, "loss": 0.5534, "step": 3928 }, { "epoch": 1.48, "learning_rate": 3.82334999257432e-06, "loss": 0.5393, "step": 3929 }, { "epoch": 1.48, "learning_rate": 3.818174967463038e-06, "loss": 0.5205, "step": 3930 }, { "epoch": 1.48, "learning_rate": 3.8130026204935656e-06, "loss": 0.5122, "step": 3931 }, { "epoch": 1.48, "learning_rate": 3.8078329539067125e-06, "loss": 0.5274, "step": 3932 }, { "epoch": 1.48, "learning_rate": 3.8026659699421274e-06, "loss": 0.4805, "step": 3933 }, { "epoch": 1.48, "learning_rate": 3.797501670838288e-06, "loss": 0.586, "step": 3934 }, { "epoch": 1.48, "learning_rate": 3.7923400588325156e-06, "loss": 0.5453, "step": 3935 }, { "epoch": 1.48, "learning_rate": 3.7871811361609724e-06, "loss": 0.5195, "step": 3936 }, { "epoch": 1.48, "learning_rate": 3.7820249050586456e-06, "loss": 0.5865, "step": 3937 }, { "epoch": 1.48, "learning_rate": 3.776871367759364e-06, "loss": 0.5952, "step": 3938 }, { "epoch": 1.49, "learning_rate": 3.7717205264957823e-06, "loss": 0.5363, "step": 3939 }, { "epoch": 1.49, "learning_rate": 3.7665723834993928e-06, "loss": 0.545, "step": 3940 }, { "epoch": 1.49, "learning_rate": 3.7614269410005166e-06, "loss": 0.5753, "step": 3941 }, { "epoch": 1.49, "learning_rate": 3.7562842012283064e-06, "loss": 0.6012, "step": 3942 }, { "epoch": 1.49, "learning_rate": 3.7511441664107406e-06, "loss": 0.5664, "step": 3943 }, { "epoch": 1.49, "learning_rate": 3.7460068387746297e-06, "loss": 0.5109, "step": 3944 }, { "epoch": 1.49, "learning_rate": 3.7408722205456096e-06, "loss": 0.5253, "step": 3945 }, { "epoch": 1.49, "learning_rate": 3.735740313948141e-06, "loss": 0.5338, "step": 3946 }, { "epoch": 1.49, "learning_rate": 3.730611121205513e-06, "loss": 0.5463, "step": 3947 }, { "epoch": 1.49, "learning_rate": 3.7254846445398365e-06, "loss": 0.5456, "step": 3948 }, { "epoch": 1.49, "learning_rate": 3.7203608861720453e-06, "loss": 0.5589, "step": 3949 }, { "epoch": 1.49, "learning_rate": 3.715239848321899e-06, "loss": 0.5179, "step": 3950 }, { "epoch": 1.49, "learning_rate": 3.7101215332079753e-06, "loss": 0.5766, "step": 3951 }, { "epoch": 1.49, "learning_rate": 3.7050059430476725e-06, "loss": 0.5034, "step": 3952 }, { "epoch": 1.49, "learning_rate": 3.699893080057211e-06, "loss": 0.5456, "step": 3953 }, { "epoch": 1.49, "learning_rate": 3.694782946451626e-06, "loss": 0.528, "step": 3954 }, { "epoch": 1.49, "learning_rate": 3.689675544444774e-06, "loss": 0.539, "step": 3955 }, { "epoch": 1.49, "learning_rate": 3.684570876249325e-06, "loss": 0.4968, "step": 3956 }, { "epoch": 1.49, "learning_rate": 3.679468944076767e-06, "loss": 0.591, "step": 3957 }, { "epoch": 1.49, "learning_rate": 3.674369750137401e-06, "loss": 0.5431, "step": 3958 }, { "epoch": 1.49, "learning_rate": 3.669273296640342e-06, "loss": 0.5403, "step": 3959 }, { "epoch": 1.49, "learning_rate": 3.6641795857935157e-06, "loss": 0.5195, "step": 3960 }, { "epoch": 1.49, "learning_rate": 3.6590886198036714e-06, "loss": 0.5695, "step": 3961 }, { "epoch": 1.49, "learning_rate": 3.65400040087635e-06, "loss": 0.5495, "step": 3962 }, { "epoch": 1.49, "learning_rate": 3.6489149312159154e-06, "loss": 0.5667, "step": 3963 }, { "epoch": 1.49, "learning_rate": 3.6438322130255376e-06, "loss": 0.6136, "step": 3964 }, { "epoch": 1.5, "learning_rate": 3.6387522485071934e-06, "loss": 0.5425, "step": 3965 }, { "epoch": 1.5, "learning_rate": 3.633675039861665e-06, "loss": 0.5615, "step": 3966 }, { "epoch": 1.5, "learning_rate": 3.628600589288548e-06, "loss": 0.555, "step": 3967 }, { "epoch": 1.5, "learning_rate": 3.623528898986237e-06, "loss": 0.5549, "step": 3968 }, { "epoch": 1.5, "learning_rate": 3.6184599711519343e-06, "loss": 0.5418, "step": 3969 }, { "epoch": 1.5, "learning_rate": 3.6133938079816367e-06, "loss": 0.5351, "step": 3970 }, { "epoch": 1.5, "learning_rate": 3.6083304116701535e-06, "loss": 0.5748, "step": 3971 }, { "epoch": 1.5, "learning_rate": 3.6032697844110896e-06, "loss": 0.531, "step": 3972 }, { "epoch": 1.5, "learning_rate": 3.598211928396851e-06, "loss": 0.5771, "step": 3973 }, { "epoch": 1.5, "learning_rate": 3.5931568458186495e-06, "loss": 0.5673, "step": 3974 }, { "epoch": 1.5, "learning_rate": 3.588104538866488e-06, "loss": 0.5704, "step": 3975 }, { "epoch": 1.5, "learning_rate": 3.5830550097291715e-06, "loss": 0.5594, "step": 3976 }, { "epoch": 1.5, "learning_rate": 3.5780082605942925e-06, "loss": 0.5438, "step": 3977 }, { "epoch": 1.5, "learning_rate": 3.572964293648249e-06, "loss": 0.5561, "step": 3978 }, { "epoch": 1.5, "learning_rate": 3.567923111076228e-06, "loss": 0.5619, "step": 3979 }, { "epoch": 1.5, "learning_rate": 3.562884715062219e-06, "loss": 0.5641, "step": 3980 }, { "epoch": 1.5, "learning_rate": 3.5578491077889942e-06, "loss": 0.5316, "step": 3981 }, { "epoch": 1.5, "learning_rate": 3.5528162914381215e-06, "loss": 0.5376, "step": 3982 }, { "epoch": 1.5, "learning_rate": 3.547786268189959e-06, "loss": 0.5362, "step": 3983 }, { "epoch": 1.5, "learning_rate": 3.54275904022366e-06, "loss": 0.538, "step": 3984 }, { "epoch": 1.5, "learning_rate": 3.5377346097171516e-06, "loss": 0.5053, "step": 3985 }, { "epoch": 1.5, "learning_rate": 3.532712978847168e-06, "loss": 0.5551, "step": 3986 }, { "epoch": 1.5, "learning_rate": 3.5276941497892212e-06, "loss": 0.5462, "step": 3987 }, { "epoch": 1.5, "learning_rate": 3.52267812471761e-06, "loss": 0.5541, "step": 3988 }, { "epoch": 1.5, "learning_rate": 3.517664905805416e-06, "loss": 0.539, "step": 3989 }, { "epoch": 1.5, "learning_rate": 3.51265449522451e-06, "loss": 0.5215, "step": 3990 }, { "epoch": 1.5, "learning_rate": 3.5076468951455466e-06, "loss": 0.5238, "step": 3991 }, { "epoch": 1.51, "learning_rate": 3.50264210773795e-06, "loss": 0.5305, "step": 3992 }, { "epoch": 1.51, "learning_rate": 3.4976401351699463e-06, "loss": 0.5497, "step": 3993 }, { "epoch": 1.51, "learning_rate": 3.4926409796085283e-06, "loss": 0.54, "step": 3994 }, { "epoch": 1.51, "learning_rate": 3.487644643219471e-06, "loss": 0.5341, "step": 3995 }, { "epoch": 1.51, "learning_rate": 3.4826511281673304e-06, "loss": 0.5567, "step": 3996 }, { "epoch": 1.51, "learning_rate": 3.4776604366154374e-06, "loss": 0.5467, "step": 3997 }, { "epoch": 1.51, "learning_rate": 3.472672570725902e-06, "loss": 0.5631, "step": 3998 }, { "epoch": 1.51, "learning_rate": 3.4676875326596095e-06, "loss": 0.529, "step": 3999 }, { "epoch": 1.51, "learning_rate": 3.4627053245762176e-06, "loss": 0.5205, "step": 4000 }, { "epoch": 1.51, "learning_rate": 3.4577259486341618e-06, "loss": 0.5211, "step": 4001 }, { "epoch": 1.51, "learning_rate": 3.4527494069906485e-06, "loss": 0.5609, "step": 4002 }, { "epoch": 1.51, "learning_rate": 3.447775701801658e-06, "loss": 0.5002, "step": 4003 }, { "epoch": 1.51, "learning_rate": 3.4428048352219368e-06, "loss": 0.4693, "step": 4004 }, { "epoch": 1.51, "learning_rate": 3.4378368094050083e-06, "loss": 0.5035, "step": 4005 }, { "epoch": 1.51, "learning_rate": 3.4328716265031604e-06, "loss": 0.5158, "step": 4006 }, { "epoch": 1.51, "learning_rate": 3.4279092886674527e-06, "loss": 0.55, "step": 4007 }, { "epoch": 1.51, "learning_rate": 3.422949798047709e-06, "loss": 0.539, "step": 4008 }, { "epoch": 1.51, "learning_rate": 3.4179931567925216e-06, "loss": 0.5756, "step": 4009 }, { "epoch": 1.51, "learning_rate": 3.4130393670492477e-06, "loss": 0.5317, "step": 4010 }, { "epoch": 1.51, "learning_rate": 3.408088430964007e-06, "loss": 0.5568, "step": 4011 }, { "epoch": 1.51, "learning_rate": 3.403140350681696e-06, "loss": 0.5412, "step": 4012 }, { "epoch": 1.51, "learning_rate": 3.398195128345951e-06, "loss": 0.5625, "step": 4013 }, { "epoch": 1.51, "learning_rate": 3.3932527660991877e-06, "loss": 0.5032, "step": 4014 }, { "epoch": 1.51, "learning_rate": 3.3883132660825767e-06, "loss": 0.518, "step": 4015 }, { "epoch": 1.51, "learning_rate": 3.3833766304360494e-06, "loss": 0.5293, "step": 4016 }, { "epoch": 1.51, "learning_rate": 3.3784428612982933e-06, "loss": 0.5898, "step": 4017 }, { "epoch": 1.52, "learning_rate": 3.3735119608067635e-06, "loss": 0.5559, "step": 4018 }, { "epoch": 1.52, "learning_rate": 3.3685839310976655e-06, "loss": 0.5128, "step": 4019 }, { "epoch": 1.52, "learning_rate": 3.3636587743059545e-06, "loss": 0.4708, "step": 4020 }, { "epoch": 1.52, "learning_rate": 3.3587364925653534e-06, "loss": 0.5587, "step": 4021 }, { "epoch": 1.52, "learning_rate": 3.353817088008332e-06, "loss": 0.5686, "step": 4022 }, { "epoch": 1.52, "learning_rate": 3.348900562766113e-06, "loss": 0.5076, "step": 4023 }, { "epoch": 1.52, "learning_rate": 3.343986918968681e-06, "loss": 0.5105, "step": 4024 }, { "epoch": 1.52, "learning_rate": 3.3390761587447627e-06, "loss": 0.5645, "step": 4025 }, { "epoch": 1.52, "learning_rate": 3.3341682842218404e-06, "loss": 0.5224, "step": 4026 }, { "epoch": 1.52, "learning_rate": 3.329263297526144e-06, "loss": 0.5281, "step": 4027 }, { "epoch": 1.52, "learning_rate": 3.324361200782649e-06, "loss": 0.5297, "step": 4028 }, { "epoch": 1.52, "learning_rate": 3.319461996115082e-06, "loss": 0.5579, "step": 4029 }, { "epoch": 1.52, "learning_rate": 3.314565685645923e-06, "loss": 0.5731, "step": 4030 }, { "epoch": 1.52, "learning_rate": 3.3096722714963903e-06, "loss": 0.6151, "step": 4031 }, { "epoch": 1.52, "learning_rate": 3.304781755786449e-06, "loss": 0.5084, "step": 4032 }, { "epoch": 1.52, "learning_rate": 3.2998941406348096e-06, "loss": 0.4965, "step": 4033 }, { "epoch": 1.52, "learning_rate": 3.295009428158927e-06, "loss": 0.5828, "step": 4034 }, { "epoch": 1.52, "learning_rate": 3.2901276204749944e-06, "loss": 0.552, "step": 4035 }, { "epoch": 1.52, "learning_rate": 3.2852487196979455e-06, "loss": 0.5126, "step": 4036 }, { "epoch": 1.52, "learning_rate": 3.2803727279414664e-06, "loss": 0.5613, "step": 4037 }, { "epoch": 1.52, "learning_rate": 3.2754996473179714e-06, "loss": 0.5083, "step": 4038 }, { "epoch": 1.52, "learning_rate": 3.2706294799386175e-06, "loss": 0.5546, "step": 4039 }, { "epoch": 1.52, "learning_rate": 3.2657622279132993e-06, "loss": 0.6299, "step": 4040 }, { "epoch": 1.52, "learning_rate": 3.2608978933506474e-06, "loss": 0.5122, "step": 4041 }, { "epoch": 1.52, "learning_rate": 3.2560364783580313e-06, "loss": 0.5675, "step": 4042 }, { "epoch": 1.52, "learning_rate": 3.2511779850415516e-06, "loss": 0.5391, "step": 4043 }, { "epoch": 1.52, "learning_rate": 3.2463224155060457e-06, "loss": 0.5509, "step": 4044 }, { "epoch": 1.53, "learning_rate": 3.2414697718550856e-06, "loss": 0.5116, "step": 4045 }, { "epoch": 1.53, "learning_rate": 3.236620056190972e-06, "loss": 0.5433, "step": 4046 }, { "epoch": 1.53, "learning_rate": 3.231773270614741e-06, "loss": 0.5117, "step": 4047 }, { "epoch": 1.53, "learning_rate": 3.2269294172261555e-06, "loss": 0.5601, "step": 4048 }, { "epoch": 1.53, "learning_rate": 3.222088498123712e-06, "loss": 0.5709, "step": 4049 }, { "epoch": 1.53, "learning_rate": 3.217250515404633e-06, "loss": 0.5651, "step": 4050 }, { "epoch": 1.53, "learning_rate": 3.21241547116487e-06, "loss": 0.5888, "step": 4051 }, { "epoch": 1.53, "learning_rate": 3.207583367499101e-06, "loss": 0.5029, "step": 4052 }, { "epoch": 1.53, "learning_rate": 3.2027542065007288e-06, "loss": 0.567, "step": 4053 }, { "epoch": 1.53, "learning_rate": 3.1979279902618852e-06, "loss": 0.5706, "step": 4054 }, { "epoch": 1.53, "learning_rate": 3.1931047208734233e-06, "loss": 0.5378, "step": 4055 }, { "epoch": 1.53, "learning_rate": 3.1882844004249194e-06, "loss": 0.583, "step": 4056 }, { "epoch": 1.53, "learning_rate": 3.1834670310046735e-06, "loss": 0.5544, "step": 4057 }, { "epoch": 1.53, "learning_rate": 3.1786526146997055e-06, "loss": 0.5277, "step": 4058 }, { "epoch": 1.53, "learning_rate": 3.173841153595758e-06, "loss": 0.522, "step": 4059 }, { "epoch": 1.53, "learning_rate": 3.1690326497772927e-06, "loss": 0.5734, "step": 4060 }, { "epoch": 1.53, "learning_rate": 3.1642271053274855e-06, "loss": 0.5398, "step": 4061 }, { "epoch": 1.53, "learning_rate": 3.1594245223282447e-06, "loss": 0.5445, "step": 4062 }, { "epoch": 1.53, "learning_rate": 3.154624902860176e-06, "loss": 0.4965, "step": 4063 }, { "epoch": 1.53, "learning_rate": 3.149828249002612e-06, "loss": 0.5214, "step": 4064 }, { "epoch": 1.53, "learning_rate": 3.145034562833602e-06, "loss": 0.512, "step": 4065 }, { "epoch": 1.53, "learning_rate": 3.1402438464299044e-06, "loss": 0.5457, "step": 4066 }, { "epoch": 1.53, "learning_rate": 3.1354561018669905e-06, "loss": 0.5669, "step": 4067 }, { "epoch": 1.53, "learning_rate": 3.1306713312190552e-06, "loss": 0.5727, "step": 4068 }, { "epoch": 1.53, "learning_rate": 3.1258895365589902e-06, "loss": 0.5725, "step": 4069 }, { "epoch": 1.53, "learning_rate": 3.121110719958411e-06, "loss": 0.5332, "step": 4070 }, { "epoch": 1.54, "learning_rate": 3.1163348834876285e-06, "loss": 0.5241, "step": 4071 }, { "epoch": 1.54, "learning_rate": 3.1115620292156734e-06, "loss": 0.5901, "step": 4072 }, { "epoch": 1.54, "learning_rate": 3.1067921592102777e-06, "loss": 0.548, "step": 4073 }, { "epoch": 1.54, "learning_rate": 3.102025275537893e-06, "loss": 0.5634, "step": 4074 }, { "epoch": 1.54, "learning_rate": 3.097261380263663e-06, "loss": 0.5542, "step": 4075 }, { "epoch": 1.54, "learning_rate": 3.0925004754514433e-06, "loss": 0.5031, "step": 4076 }, { "epoch": 1.54, "learning_rate": 3.0877425631637946e-06, "loss": 0.5101, "step": 4077 }, { "epoch": 1.54, "learning_rate": 3.0829876454619747e-06, "loss": 0.5427, "step": 4078 }, { "epoch": 1.54, "learning_rate": 3.0782357244059503e-06, "loss": 0.5025, "step": 4079 }, { "epoch": 1.54, "learning_rate": 3.0734868020543872e-06, "loss": 0.5416, "step": 4080 }, { "epoch": 1.54, "learning_rate": 3.068740880464657e-06, "loss": 0.559, "step": 4081 }, { "epoch": 1.54, "learning_rate": 3.0639979616928263e-06, "loss": 0.5369, "step": 4082 }, { "epoch": 1.54, "learning_rate": 3.0592580477936606e-06, "loss": 0.59, "step": 4083 }, { "epoch": 1.54, "learning_rate": 3.0545211408206264e-06, "loss": 0.5014, "step": 4084 }, { "epoch": 1.54, "learning_rate": 3.0497872428258877e-06, "loss": 0.485, "step": 4085 }, { "epoch": 1.54, "learning_rate": 3.0450563558602954e-06, "loss": 0.5339, "step": 4086 }, { "epoch": 1.54, "learning_rate": 3.0403284819734113e-06, "loss": 0.5521, "step": 4087 }, { "epoch": 1.54, "learning_rate": 3.0356036232134823e-06, "loss": 0.5625, "step": 4088 }, { "epoch": 1.54, "learning_rate": 3.0308817816274506e-06, "loss": 0.5561, "step": 4089 }, { "epoch": 1.54, "learning_rate": 3.026162959260953e-06, "loss": 0.5224, "step": 4090 }, { "epoch": 1.54, "learning_rate": 3.021447158158316e-06, "loss": 0.5542, "step": 4091 }, { "epoch": 1.54, "learning_rate": 3.0167343803625594e-06, "loss": 0.5032, "step": 4092 }, { "epoch": 1.54, "learning_rate": 3.012024627915391e-06, "loss": 0.5133, "step": 4093 }, { "epoch": 1.54, "learning_rate": 3.0073179028572087e-06, "loss": 0.5487, "step": 4094 }, { "epoch": 1.54, "learning_rate": 3.0026142072271004e-06, "loss": 0.5394, "step": 4095 }, { "epoch": 1.54, "learning_rate": 2.9979135430628403e-06, "loss": 0.5386, "step": 4096 }, { "epoch": 1.54, "learning_rate": 2.9932159124008877e-06, "loss": 0.5089, "step": 4097 }, { "epoch": 1.55, "learning_rate": 2.9885213172763915e-06, "loss": 0.5344, "step": 4098 }, { "epoch": 1.55, "learning_rate": 2.9838297597231825e-06, "loss": 0.5831, "step": 4099 }, { "epoch": 1.55, "learning_rate": 2.979141241773775e-06, "loss": 0.5387, "step": 4100 }, { "epoch": 1.55, "learning_rate": 2.974455765459371e-06, "loss": 0.5268, "step": 4101 }, { "epoch": 1.55, "learning_rate": 2.969773332809849e-06, "loss": 0.5454, "step": 4102 }, { "epoch": 1.55, "learning_rate": 2.965093945853772e-06, "loss": 0.5656, "step": 4103 }, { "epoch": 1.55, "learning_rate": 2.9604176066183842e-06, "loss": 0.6002, "step": 4104 }, { "epoch": 1.55, "learning_rate": 2.9557443171296063e-06, "loss": 0.5709, "step": 4105 }, { "epoch": 1.55, "learning_rate": 2.951074079412042e-06, "loss": 0.5606, "step": 4106 }, { "epoch": 1.55, "learning_rate": 2.9464068954889692e-06, "loss": 0.4972, "step": 4107 }, { "epoch": 1.55, "learning_rate": 2.941742767382345e-06, "loss": 0.5653, "step": 4108 }, { "epoch": 1.55, "learning_rate": 2.9370816971128013e-06, "loss": 0.5442, "step": 4109 }, { "epoch": 1.55, "learning_rate": 2.9324236866996456e-06, "loss": 0.487, "step": 4110 }, { "epoch": 1.55, "learning_rate": 2.9277687381608575e-06, "loss": 0.4991, "step": 4111 }, { "epoch": 1.55, "learning_rate": 2.9231168535130996e-06, "loss": 0.5663, "step": 4112 }, { "epoch": 1.55, "learning_rate": 2.9184680347716987e-06, "loss": 0.526, "step": 4113 }, { "epoch": 1.55, "learning_rate": 2.9138222839506492e-06, "loss": 0.5635, "step": 4114 }, { "epoch": 1.55, "learning_rate": 2.9091796030626252e-06, "loss": 0.5197, "step": 4115 }, { "epoch": 1.55, "learning_rate": 2.904539994118968e-06, "loss": 0.5208, "step": 4116 }, { "epoch": 1.55, "learning_rate": 2.899903459129687e-06, "loss": 0.5304, "step": 4117 }, { "epoch": 1.55, "learning_rate": 2.8952700001034584e-06, "loss": 0.5817, "step": 4118 }, { "epoch": 1.55, "learning_rate": 2.8906396190476326e-06, "loss": 0.5963, "step": 4119 }, { "epoch": 1.55, "learning_rate": 2.8860123179682244e-06, "loss": 0.5573, "step": 4120 }, { "epoch": 1.55, "learning_rate": 2.8813880988699027e-06, "loss": 0.4822, "step": 4121 }, { "epoch": 1.55, "learning_rate": 2.876766963756015e-06, "loss": 0.5278, "step": 4122 }, { "epoch": 1.55, "learning_rate": 2.8721489146285677e-06, "loss": 0.5593, "step": 4123 }, { "epoch": 1.56, "learning_rate": 2.8675339534882276e-06, "loss": 0.5531, "step": 4124 }, { "epoch": 1.56, "learning_rate": 2.862922082334333e-06, "loss": 0.4849, "step": 4125 }, { "epoch": 1.56, "learning_rate": 2.8583133031648736e-06, "loss": 0.5136, "step": 4126 }, { "epoch": 1.56, "learning_rate": 2.8537076179765035e-06, "loss": 0.5496, "step": 4127 }, { "epoch": 1.56, "learning_rate": 2.8491050287645383e-06, "loss": 0.6178, "step": 4128 }, { "epoch": 1.56, "learning_rate": 2.844505537522946e-06, "loss": 0.5523, "step": 4129 }, { "epoch": 1.56, "learning_rate": 2.839909146244354e-06, "loss": 0.5091, "step": 4130 }, { "epoch": 1.56, "learning_rate": 2.835315856920058e-06, "loss": 0.5374, "step": 4131 }, { "epoch": 1.56, "learning_rate": 2.8307256715399957e-06, "loss": 0.5314, "step": 4132 }, { "epoch": 1.56, "learning_rate": 2.8261385920927676e-06, "loss": 0.5701, "step": 4133 }, { "epoch": 1.56, "learning_rate": 2.821554620565624e-06, "loss": 0.5818, "step": 4134 }, { "epoch": 1.56, "learning_rate": 2.8169737589444767e-06, "loss": 0.5211, "step": 4135 }, { "epoch": 1.56, "learning_rate": 2.8123960092138747e-06, "loss": 0.5635, "step": 4136 }, { "epoch": 1.56, "learning_rate": 2.807821373357038e-06, "loss": 0.5069, "step": 4137 }, { "epoch": 1.56, "learning_rate": 2.8032498533558263e-06, "loss": 0.5548, "step": 4138 }, { "epoch": 1.56, "learning_rate": 2.7986814511907503e-06, "loss": 0.5305, "step": 4139 }, { "epoch": 1.56, "learning_rate": 2.7941161688409714e-06, "loss": 0.5157, "step": 4140 }, { "epoch": 1.56, "learning_rate": 2.789554008284301e-06, "loss": 0.5766, "step": 4141 }, { "epoch": 1.56, "learning_rate": 2.7849949714971936e-06, "loss": 0.5442, "step": 4142 }, { "epoch": 1.56, "learning_rate": 2.780439060454756e-06, "loss": 0.5762, "step": 4143 }, { "epoch": 1.56, "learning_rate": 2.775886277130736e-06, "loss": 0.5515, "step": 4144 }, { "epoch": 1.56, "learning_rate": 2.7713366234975294e-06, "loss": 0.5266, "step": 4145 }, { "epoch": 1.56, "learning_rate": 2.7667901015261747e-06, "loss": 0.528, "step": 4146 }, { "epoch": 1.56, "learning_rate": 2.762246713186354e-06, "loss": 0.5319, "step": 4147 }, { "epoch": 1.56, "learning_rate": 2.7577064604463923e-06, "loss": 0.5177, "step": 4148 }, { "epoch": 1.56, "learning_rate": 2.7531693452732556e-06, "loss": 0.5378, "step": 4149 }, { "epoch": 1.56, "learning_rate": 2.7486353696325508e-06, "loss": 0.5322, "step": 4150 }, { "epoch": 1.57, "learning_rate": 2.7441045354885243e-06, "loss": 0.5615, "step": 4151 }, { "epoch": 1.57, "learning_rate": 2.7395768448040615e-06, "loss": 0.5001, "step": 4152 }, { "epoch": 1.57, "learning_rate": 2.735052299540688e-06, "loss": 0.5476, "step": 4153 }, { "epoch": 1.57, "learning_rate": 2.7305309016585636e-06, "loss": 0.536, "step": 4154 }, { "epoch": 1.57, "learning_rate": 2.7260126531164834e-06, "loss": 0.5491, "step": 4155 }, { "epoch": 1.57, "learning_rate": 2.7214975558718902e-06, "loss": 0.5035, "step": 4156 }, { "epoch": 1.57, "learning_rate": 2.7169856118808414e-06, "loss": 0.5586, "step": 4157 }, { "epoch": 1.57, "learning_rate": 2.7124768230980447e-06, "loss": 0.5744, "step": 4158 }, { "epoch": 1.57, "learning_rate": 2.7079711914768325e-06, "loss": 0.5525, "step": 4159 }, { "epoch": 1.57, "learning_rate": 2.7034687189691745e-06, "loss": 0.4978, "step": 4160 }, { "epoch": 1.57, "learning_rate": 2.698969407525668e-06, "loss": 0.5171, "step": 4161 }, { "epoch": 1.57, "learning_rate": 2.694473259095538e-06, "loss": 0.504, "step": 4162 }, { "epoch": 1.57, "learning_rate": 2.689980275626656e-06, "loss": 0.4872, "step": 4163 }, { "epoch": 1.57, "learning_rate": 2.6854904590654974e-06, "loss": 0.554, "step": 4164 }, { "epoch": 1.57, "learning_rate": 2.681003811357182e-06, "loss": 0.5578, "step": 4165 }, { "epoch": 1.57, "learning_rate": 2.676520334445455e-06, "loss": 0.5198, "step": 4166 }, { "epoch": 1.57, "learning_rate": 2.6720400302726825e-06, "loss": 0.6013, "step": 4167 }, { "epoch": 1.57, "learning_rate": 2.6675629007798566e-06, "loss": 0.5418, "step": 4168 }, { "epoch": 1.57, "learning_rate": 2.6630889479066046e-06, "loss": 0.5885, "step": 4169 }, { "epoch": 1.57, "learning_rate": 2.6586181735911655e-06, "loss": 0.5257, "step": 4170 }, { "epoch": 1.57, "learning_rate": 2.654150579770408e-06, "loss": 0.5092, "step": 4171 }, { "epoch": 1.57, "learning_rate": 2.649686168379816e-06, "loss": 0.5411, "step": 4172 }, { "epoch": 1.57, "learning_rate": 2.6452249413535e-06, "loss": 0.5336, "step": 4173 }, { "epoch": 1.57, "learning_rate": 2.6407669006241887e-06, "loss": 0.5285, "step": 4174 }, { "epoch": 1.57, "learning_rate": 2.6363120481232362e-06, "loss": 0.4683, "step": 4175 }, { "epoch": 1.57, "learning_rate": 2.631860385780609e-06, "loss": 0.5646, "step": 4176 }, { "epoch": 1.58, "learning_rate": 2.6274119155248934e-06, "loss": 0.5414, "step": 4177 }, { "epoch": 1.58, "learning_rate": 2.622966639283295e-06, "loss": 0.5005, "step": 4178 }, { "epoch": 1.58, "learning_rate": 2.618524558981629e-06, "loss": 0.5323, "step": 4179 }, { "epoch": 1.58, "learning_rate": 2.614085676544331e-06, "loss": 0.5776, "step": 4180 }, { "epoch": 1.58, "learning_rate": 2.60964999389445e-06, "loss": 0.5614, "step": 4181 }, { "epoch": 1.58, "learning_rate": 2.605217512953653e-06, "loss": 0.5345, "step": 4182 }, { "epoch": 1.58, "learning_rate": 2.6007882356422165e-06, "loss": 0.5145, "step": 4183 }, { "epoch": 1.58, "learning_rate": 2.596362163879026e-06, "loss": 0.5257, "step": 4184 }, { "epoch": 1.58, "learning_rate": 2.591939299581586e-06, "loss": 0.5184, "step": 4185 }, { "epoch": 1.58, "learning_rate": 2.587519644666001e-06, "loss": 0.4875, "step": 4186 }, { "epoch": 1.58, "learning_rate": 2.5831032010469904e-06, "loss": 0.6552, "step": 4187 }, { "epoch": 1.58, "learning_rate": 2.5786899706378863e-06, "loss": 0.5368, "step": 4188 }, { "epoch": 1.58, "learning_rate": 2.5742799553506258e-06, "loss": 0.5238, "step": 4189 }, { "epoch": 1.58, "learning_rate": 2.5698731570957512e-06, "loss": 0.4925, "step": 4190 }, { "epoch": 1.58, "learning_rate": 2.5654695777824114e-06, "loss": 0.5592, "step": 4191 }, { "epoch": 1.58, "learning_rate": 2.561069219318363e-06, "loss": 0.5567, "step": 4192 }, { "epoch": 1.58, "learning_rate": 2.5566720836099646e-06, "loss": 0.5333, "step": 4193 }, { "epoch": 1.58, "learning_rate": 2.5522781725621814e-06, "loss": 0.556, "step": 4194 }, { "epoch": 1.58, "learning_rate": 2.547887488078581e-06, "loss": 0.5479, "step": 4195 }, { "epoch": 1.58, "learning_rate": 2.5435000320613303e-06, "loss": 0.5838, "step": 4196 }, { "epoch": 1.58, "learning_rate": 2.5391158064111997e-06, "loss": 0.5274, "step": 4197 }, { "epoch": 1.58, "learning_rate": 2.5347348130275608e-06, "loss": 0.5302, "step": 4198 }, { "epoch": 1.58, "learning_rate": 2.5303570538083844e-06, "loss": 0.5496, "step": 4199 }, { "epoch": 1.58, "learning_rate": 2.5259825306502396e-06, "loss": 0.5436, "step": 4200 }, { "epoch": 1.58, "learning_rate": 2.5216112454482945e-06, "loss": 0.5666, "step": 4201 }, { "epoch": 1.58, "learning_rate": 2.517243200096312e-06, "loss": 0.5119, "step": 4202 }, { "epoch": 1.58, "learning_rate": 2.5128783964866544e-06, "loss": 0.5782, "step": 4203 }, { "epoch": 1.59, "learning_rate": 2.5085168365102775e-06, "loss": 0.5305, "step": 4204 }, { "epoch": 1.59, "learning_rate": 2.504158522056733e-06, "loss": 0.5421, "step": 4205 }, { "epoch": 1.59, "learning_rate": 2.499803455014167e-06, "loss": 0.5208, "step": 4206 }, { "epoch": 1.59, "learning_rate": 2.4954516372693182e-06, "loss": 0.5954, "step": 4207 }, { "epoch": 1.59, "learning_rate": 2.4911030707075157e-06, "loss": 0.5282, "step": 4208 }, { "epoch": 1.59, "learning_rate": 2.4867577572126832e-06, "loss": 0.5942, "step": 4209 }, { "epoch": 1.59, "learning_rate": 2.482415698667333e-06, "loss": 0.5556, "step": 4210 }, { "epoch": 1.59, "learning_rate": 2.4780768969525683e-06, "loss": 0.5258, "step": 4211 }, { "epoch": 1.59, "learning_rate": 2.473741353948077e-06, "loss": 0.559, "step": 4212 }, { "epoch": 1.59, "learning_rate": 2.4694090715321493e-06, "loss": 0.5668, "step": 4213 }, { "epoch": 1.59, "learning_rate": 2.465080051581644e-06, "loss": 0.5242, "step": 4214 }, { "epoch": 1.59, "learning_rate": 2.4607542959720174e-06, "loss": 0.5439, "step": 4215 }, { "epoch": 1.59, "learning_rate": 2.4564318065773096e-06, "loss": 0.4613, "step": 4216 }, { "epoch": 1.59, "learning_rate": 2.4521125852701455e-06, "loss": 0.5471, "step": 4217 }, { "epoch": 1.59, "learning_rate": 2.447796633921732e-06, "loss": 0.5155, "step": 4218 }, { "epoch": 1.59, "learning_rate": 2.4434839544018663e-06, "loss": 0.5666, "step": 4219 }, { "epoch": 1.59, "learning_rate": 2.439174548578921e-06, "loss": 0.5734, "step": 4220 }, { "epoch": 1.59, "learning_rate": 2.4348684183198555e-06, "loss": 0.4669, "step": 4221 }, { "epoch": 1.59, "learning_rate": 2.4305655654902017e-06, "loss": 0.5163, "step": 4222 }, { "epoch": 1.59, "learning_rate": 2.426265991954081e-06, "loss": 0.562, "step": 4223 }, { "epoch": 1.59, "learning_rate": 2.4219696995741894e-06, "loss": 0.5787, "step": 4224 }, { "epoch": 1.59, "learning_rate": 2.417676690211801e-06, "loss": 0.5107, "step": 4225 }, { "epoch": 1.59, "learning_rate": 2.4133869657267726e-06, "loss": 0.5358, "step": 4226 }, { "epoch": 1.59, "learning_rate": 2.4091005279775347e-06, "loss": 0.5456, "step": 4227 }, { "epoch": 1.59, "learning_rate": 2.404817378821094e-06, "loss": 0.5196, "step": 4228 }, { "epoch": 1.59, "learning_rate": 2.4005375201130275e-06, "loss": 0.5143, "step": 4229 }, { "epoch": 1.6, "learning_rate": 2.3962609537074944e-06, "loss": 0.5499, "step": 4230 }, { "epoch": 1.6, "learning_rate": 2.3919876814572197e-06, "loss": 0.5496, "step": 4231 }, { "epoch": 1.6, "learning_rate": 2.3877177052135148e-06, "loss": 0.5263, "step": 4232 }, { "epoch": 1.6, "learning_rate": 2.383451026826249e-06, "loss": 0.5021, "step": 4233 }, { "epoch": 1.6, "learning_rate": 2.379187648143869e-06, "loss": 0.5567, "step": 4234 }, { "epoch": 1.6, "learning_rate": 2.3749275710133925e-06, "loss": 0.5994, "step": 4235 }, { "epoch": 1.6, "learning_rate": 2.3706707972804067e-06, "loss": 0.514, "step": 4236 }, { "epoch": 1.6, "learning_rate": 2.366417328789059e-06, "loss": 0.5482, "step": 4237 }, { "epoch": 1.6, "learning_rate": 2.3621671673820813e-06, "loss": 0.4906, "step": 4238 }, { "epoch": 1.6, "learning_rate": 2.35792031490076e-06, "loss": 0.5251, "step": 4239 }, { "epoch": 1.6, "learning_rate": 2.353676773184953e-06, "loss": 0.5103, "step": 4240 }, { "epoch": 1.6, "learning_rate": 2.3494365440730836e-06, "loss": 0.4997, "step": 4241 }, { "epoch": 1.6, "learning_rate": 2.345199629402137e-06, "loss": 0.5417, "step": 4242 }, { "epoch": 1.6, "learning_rate": 2.3409660310076663e-06, "loss": 0.5147, "step": 4243 }, { "epoch": 1.6, "learning_rate": 2.3367357507237866e-06, "loss": 0.6006, "step": 4244 }, { "epoch": 1.6, "learning_rate": 2.3325087903831744e-06, "loss": 0.5339, "step": 4245 }, { "epoch": 1.6, "learning_rate": 2.328285151817068e-06, "loss": 0.5132, "step": 4246 }, { "epoch": 1.6, "learning_rate": 2.324064836855269e-06, "loss": 0.548, "step": 4247 }, { "epoch": 1.6, "learning_rate": 2.3198478473261367e-06, "loss": 0.5297, "step": 4248 }, { "epoch": 1.6, "learning_rate": 2.31563418505659e-06, "loss": 0.5113, "step": 4249 }, { "epoch": 1.6, "learning_rate": 2.311423851872108e-06, "loss": 0.5131, "step": 4250 }, { "epoch": 1.6, "learning_rate": 2.307216849596725e-06, "loss": 0.5695, "step": 4251 }, { "epoch": 1.6, "learning_rate": 2.303013180053034e-06, "loss": 0.5668, "step": 4252 }, { "epoch": 1.6, "learning_rate": 2.2988128450621847e-06, "loss": 0.4804, "step": 4253 }, { "epoch": 1.6, "learning_rate": 2.2946158464438805e-06, "loss": 0.5114, "step": 4254 }, { "epoch": 1.6, "learning_rate": 2.290422186016381e-06, "loss": 0.5674, "step": 4255 }, { "epoch": 1.6, "learning_rate": 2.2862318655964976e-06, "loss": 0.5453, "step": 4256 }, { "epoch": 1.61, "learning_rate": 2.2820448869995958e-06, "loss": 0.5535, "step": 4257 }, { "epoch": 1.61, "learning_rate": 2.2778612520395936e-06, "loss": 0.5856, "step": 4258 }, { "epoch": 1.61, "learning_rate": 2.273680962528961e-06, "loss": 0.5599, "step": 4259 }, { "epoch": 1.61, "learning_rate": 2.2695040202787177e-06, "loss": 0.5853, "step": 4260 }, { "epoch": 1.61, "learning_rate": 2.2653304270984334e-06, "loss": 0.5273, "step": 4261 }, { "epoch": 1.61, "learning_rate": 2.2611601847962237e-06, "loss": 0.524, "step": 4262 }, { "epoch": 1.61, "learning_rate": 2.256993295178761e-06, "loss": 0.5482, "step": 4263 }, { "epoch": 1.61, "learning_rate": 2.252829760051262e-06, "loss": 0.5078, "step": 4264 }, { "epoch": 1.61, "learning_rate": 2.2486695812174798e-06, "loss": 0.5131, "step": 4265 }, { "epoch": 1.61, "learning_rate": 2.2445127604797256e-06, "loss": 0.5901, "step": 4266 }, { "epoch": 1.61, "learning_rate": 2.2403592996388535e-06, "loss": 0.5235, "step": 4267 }, { "epoch": 1.61, "learning_rate": 2.2362092004942583e-06, "loss": 0.4874, "step": 4268 }, { "epoch": 1.61, "learning_rate": 2.2320624648438794e-06, "loss": 0.5563, "step": 4269 }, { "epoch": 1.61, "learning_rate": 2.2279190944842046e-06, "loss": 0.5594, "step": 4270 }, { "epoch": 1.61, "learning_rate": 2.223779091210261e-06, "loss": 0.5182, "step": 4271 }, { "epoch": 1.61, "learning_rate": 2.2196424568156073e-06, "loss": 0.5674, "step": 4272 }, { "epoch": 1.61, "learning_rate": 2.2155091930923545e-06, "loss": 0.5318, "step": 4273 }, { "epoch": 1.61, "learning_rate": 2.2113793018311514e-06, "loss": 0.5168, "step": 4274 }, { "epoch": 1.61, "learning_rate": 2.207252784821179e-06, "loss": 0.4957, "step": 4275 }, { "epoch": 1.61, "learning_rate": 2.203129643850168e-06, "loss": 0.5694, "step": 4276 }, { "epoch": 1.61, "learning_rate": 2.199009880704377e-06, "loss": 0.5108, "step": 4277 }, { "epoch": 1.61, "learning_rate": 2.194893497168603e-06, "loss": 0.5859, "step": 4278 }, { "epoch": 1.61, "learning_rate": 2.1907804950261847e-06, "loss": 0.5649, "step": 4279 }, { "epoch": 1.61, "learning_rate": 2.186670876058985e-06, "loss": 0.5232, "step": 4280 }, { "epoch": 1.61, "learning_rate": 2.1825646420474067e-06, "loss": 0.5314, "step": 4281 }, { "epoch": 1.61, "learning_rate": 2.1784617947703922e-06, "loss": 0.588, "step": 4282 }, { "epoch": 1.62, "learning_rate": 2.17436233600541e-06, "loss": 0.5191, "step": 4283 }, { "epoch": 1.62, "learning_rate": 2.1702662675284604e-06, "loss": 0.4829, "step": 4284 }, { "epoch": 1.62, "learning_rate": 2.166173591114077e-06, "loss": 0.547, "step": 4285 }, { "epoch": 1.62, "learning_rate": 2.162084308535325e-06, "loss": 0.5764, "step": 4286 }, { "epoch": 1.62, "learning_rate": 2.1579984215637928e-06, "loss": 0.5131, "step": 4287 }, { "epoch": 1.62, "learning_rate": 2.1539159319696025e-06, "loss": 0.511, "step": 4288 }, { "epoch": 1.62, "learning_rate": 2.14983684152141e-06, "loss": 0.5319, "step": 4289 }, { "epoch": 1.62, "learning_rate": 2.14576115198639e-06, "loss": 0.5764, "step": 4290 }, { "epoch": 1.62, "learning_rate": 2.141688865130246e-06, "loss": 0.5665, "step": 4291 }, { "epoch": 1.62, "learning_rate": 2.1376199827172086e-06, "loss": 0.5272, "step": 4292 }, { "epoch": 1.62, "learning_rate": 2.1335545065100326e-06, "loss": 0.534, "step": 4293 }, { "epoch": 1.62, "learning_rate": 2.1294924382699977e-06, "loss": 0.5697, "step": 4294 }, { "epoch": 1.62, "learning_rate": 2.125433779756908e-06, "loss": 0.4887, "step": 4295 }, { "epoch": 1.62, "learning_rate": 2.1213785327290893e-06, "loss": 0.5337, "step": 4296 }, { "epoch": 1.62, "learning_rate": 2.1173266989433873e-06, "loss": 0.5203, "step": 4297 }, { "epoch": 1.62, "learning_rate": 2.113278280155173e-06, "loss": 0.5362, "step": 4298 }, { "epoch": 1.62, "learning_rate": 2.1092332781183367e-06, "loss": 0.5579, "step": 4299 }, { "epoch": 1.62, "learning_rate": 2.105191694585287e-06, "loss": 0.5736, "step": 4300 }, { "epoch": 1.62, "learning_rate": 2.101153531306952e-06, "loss": 0.554, "step": 4301 }, { "epoch": 1.62, "learning_rate": 2.0971187900327793e-06, "loss": 0.5062, "step": 4302 }, { "epoch": 1.62, "learning_rate": 2.093087472510733e-06, "loss": 0.539, "step": 4303 }, { "epoch": 1.62, "learning_rate": 2.089059580487295e-06, "loss": 0.5562, "step": 4304 }, { "epoch": 1.62, "learning_rate": 2.08503511570746e-06, "loss": 0.5541, "step": 4305 }, { "epoch": 1.62, "learning_rate": 2.0810140799147393e-06, "loss": 0.4956, "step": 4306 }, { "epoch": 1.62, "learning_rate": 2.076996474851167e-06, "loss": 0.5654, "step": 4307 }, { "epoch": 1.62, "learning_rate": 2.072982302257276e-06, "loss": 0.4768, "step": 4308 }, { "epoch": 1.62, "learning_rate": 2.068971563872122e-06, "loss": 0.5336, "step": 4309 }, { "epoch": 1.63, "learning_rate": 2.06496426143327e-06, "loss": 0.528, "step": 4310 }, { "epoch": 1.63, "learning_rate": 2.0609603966767977e-06, "loss": 0.5305, "step": 4311 }, { "epoch": 1.63, "learning_rate": 2.0569599713372933e-06, "loss": 0.4981, "step": 4312 }, { "epoch": 1.63, "learning_rate": 2.0529629871478498e-06, "loss": 0.5576, "step": 4313 }, { "epoch": 1.63, "learning_rate": 2.0489694458400845e-06, "loss": 0.5402, "step": 4314 }, { "epoch": 1.63, "learning_rate": 2.0449793491441026e-06, "loss": 0.5422, "step": 4315 }, { "epoch": 1.63, "learning_rate": 2.040992698788531e-06, "loss": 0.5528, "step": 4316 }, { "epoch": 1.63, "learning_rate": 2.037009496500498e-06, "loss": 0.4874, "step": 4317 }, { "epoch": 1.63, "learning_rate": 2.0330297440056423e-06, "loss": 0.5006, "step": 4318 }, { "epoch": 1.63, "learning_rate": 2.0290534430281016e-06, "loss": 0.558, "step": 4319 }, { "epoch": 1.63, "learning_rate": 2.0250805952905263e-06, "loss": 0.5857, "step": 4320 }, { "epoch": 1.63, "learning_rate": 2.0211112025140658e-06, "loss": 0.5004, "step": 4321 }, { "epoch": 1.63, "learning_rate": 2.0171452664183756e-06, "loss": 0.5413, "step": 4322 }, { "epoch": 1.63, "learning_rate": 2.013182788721606e-06, "loss": 0.5296, "step": 4323 }, { "epoch": 1.63, "learning_rate": 2.0092237711404174e-06, "loss": 0.5541, "step": 4324 }, { "epoch": 1.63, "learning_rate": 2.005268215389965e-06, "loss": 0.5471, "step": 4325 }, { "epoch": 1.63, "learning_rate": 2.001316123183915e-06, "loss": 0.5727, "step": 4326 }, { "epoch": 1.63, "learning_rate": 1.9973674962344214e-06, "loss": 0.5291, "step": 4327 }, { "epoch": 1.63, "learning_rate": 1.993422336252142e-06, "loss": 0.4846, "step": 4328 }, { "epoch": 1.63, "learning_rate": 1.9894806449462335e-06, "loss": 0.5502, "step": 4329 }, { "epoch": 1.63, "learning_rate": 1.985542424024345e-06, "loss": 0.5217, "step": 4330 }, { "epoch": 1.63, "learning_rate": 1.981607675192626e-06, "loss": 0.5651, "step": 4331 }, { "epoch": 1.63, "learning_rate": 1.9776764001557203e-06, "loss": 0.5599, "step": 4332 }, { "epoch": 1.63, "learning_rate": 1.9737486006167717e-06, "loss": 0.6077, "step": 4333 }, { "epoch": 1.63, "learning_rate": 1.9698242782774126e-06, "loss": 0.5835, "step": 4334 }, { "epoch": 1.63, "learning_rate": 1.9659034348377703e-06, "loss": 0.5336, "step": 4335 }, { "epoch": 1.63, "learning_rate": 1.9619860719964644e-06, "loss": 0.5272, "step": 4336 }, { "epoch": 1.64, "learning_rate": 1.9580721914506105e-06, "loss": 0.5289, "step": 4337 }, { "epoch": 1.64, "learning_rate": 1.9541617948958068e-06, "loss": 0.5239, "step": 4338 }, { "epoch": 1.64, "learning_rate": 1.9502548840261526e-06, "loss": 0.5182, "step": 4339 }, { "epoch": 1.64, "learning_rate": 1.946351460534233e-06, "loss": 0.5324, "step": 4340 }, { "epoch": 1.64, "learning_rate": 1.9424515261111178e-06, "loss": 0.5474, "step": 4341 }, { "epoch": 1.64, "learning_rate": 1.9385550824463727e-06, "loss": 0.5839, "step": 4342 }, { "epoch": 1.64, "learning_rate": 1.9346621312280457e-06, "loss": 0.5682, "step": 4343 }, { "epoch": 1.64, "learning_rate": 1.9307726741426746e-06, "loss": 0.5511, "step": 4344 }, { "epoch": 1.64, "learning_rate": 1.926886712875281e-06, "loss": 0.5594, "step": 4345 }, { "epoch": 1.64, "learning_rate": 1.923004249109375e-06, "loss": 0.5618, "step": 4346 }, { "epoch": 1.64, "learning_rate": 1.919125284526949e-06, "loss": 0.4986, "step": 4347 }, { "epoch": 1.64, "learning_rate": 1.91524982080848e-06, "loss": 0.5011, "step": 4348 }, { "epoch": 1.64, "learning_rate": 1.9113778596329313e-06, "loss": 0.5249, "step": 4349 }, { "epoch": 1.64, "learning_rate": 1.9075094026777428e-06, "loss": 0.5563, "step": 4350 }, { "epoch": 1.64, "learning_rate": 1.9036444516188413e-06, "loss": 0.5069, "step": 4351 }, { "epoch": 1.64, "learning_rate": 1.8997830081306334e-06, "loss": 0.5658, "step": 4352 }, { "epoch": 1.64, "learning_rate": 1.8959250738860057e-06, "loss": 0.5074, "step": 4353 }, { "epoch": 1.64, "learning_rate": 1.8920706505563246e-06, "loss": 0.5365, "step": 4354 }, { "epoch": 1.64, "learning_rate": 1.8882197398114354e-06, "loss": 0.5169, "step": 4355 }, { "epoch": 1.64, "learning_rate": 1.8843723433196614e-06, "loss": 0.5676, "step": 4356 }, { "epoch": 1.64, "learning_rate": 1.8805284627478049e-06, "loss": 0.532, "step": 4357 }, { "epoch": 1.64, "learning_rate": 1.8766880997611424e-06, "loss": 0.5158, "step": 4358 }, { "epoch": 1.64, "learning_rate": 1.872851256023429e-06, "loss": 0.5703, "step": 4359 }, { "epoch": 1.64, "learning_rate": 1.8690179331968938e-06, "loss": 0.5684, "step": 4360 }, { "epoch": 1.64, "learning_rate": 1.8651881329422417e-06, "loss": 0.5441, "step": 4361 }, { "epoch": 1.64, "learning_rate": 1.8613618569186487e-06, "loss": 0.4929, "step": 4362 }, { "epoch": 1.65, "learning_rate": 1.8575391067837655e-06, "loss": 0.5317, "step": 4363 }, { "epoch": 1.65, "learning_rate": 1.853719884193721e-06, "loss": 0.5671, "step": 4364 }, { "epoch": 1.65, "learning_rate": 1.849904190803109e-06, "loss": 0.6088, "step": 4365 }, { "epoch": 1.65, "learning_rate": 1.846092028264993e-06, "loss": 0.5426, "step": 4366 }, { "epoch": 1.65, "learning_rate": 1.8422833982309108e-06, "loss": 0.5333, "step": 4367 }, { "epoch": 1.65, "learning_rate": 1.8384783023508679e-06, "loss": 0.5053, "step": 4368 }, { "epoch": 1.65, "learning_rate": 1.8346767422733402e-06, "loss": 0.4846, "step": 4369 }, { "epoch": 1.65, "learning_rate": 1.8308787196452738e-06, "loss": 0.5014, "step": 4370 }, { "epoch": 1.65, "learning_rate": 1.8270842361120788e-06, "loss": 0.5677, "step": 4371 }, { "epoch": 1.65, "learning_rate": 1.8232932933176339e-06, "loss": 0.5005, "step": 4372 }, { "epoch": 1.65, "learning_rate": 1.8195058929042798e-06, "loss": 0.5619, "step": 4373 }, { "epoch": 1.65, "learning_rate": 1.815722036512827e-06, "loss": 0.5861, "step": 4374 }, { "epoch": 1.65, "learning_rate": 1.811941725782549e-06, "loss": 0.5683, "step": 4375 }, { "epoch": 1.65, "learning_rate": 1.808164962351182e-06, "loss": 0.5751, "step": 4376 }, { "epoch": 1.65, "learning_rate": 1.8043917478549322e-06, "loss": 0.5172, "step": 4377 }, { "epoch": 1.65, "learning_rate": 1.8006220839284594e-06, "loss": 0.5431, "step": 4378 }, { "epoch": 1.65, "learning_rate": 1.7968559722048906e-06, "loss": 0.5428, "step": 4379 }, { "epoch": 1.65, "learning_rate": 1.7930934143158119e-06, "loss": 0.5088, "step": 4380 }, { "epoch": 1.65, "learning_rate": 1.789334411891267e-06, "loss": 0.5462, "step": 4381 }, { "epoch": 1.65, "learning_rate": 1.7855789665597622e-06, "loss": 0.5509, "step": 4382 }, { "epoch": 1.65, "learning_rate": 1.7818270799482683e-06, "loss": 0.5814, "step": 4383 }, { "epoch": 1.65, "learning_rate": 1.7780787536822042e-06, "loss": 0.5885, "step": 4384 }, { "epoch": 1.65, "learning_rate": 1.7743339893854538e-06, "loss": 0.5094, "step": 4385 }, { "epoch": 1.65, "learning_rate": 1.770592788680353e-06, "loss": 0.5227, "step": 4386 }, { "epoch": 1.65, "learning_rate": 1.7668551531876998e-06, "loss": 0.5823, "step": 4387 }, { "epoch": 1.65, "learning_rate": 1.7631210845267354e-06, "loss": 0.5242, "step": 4388 }, { "epoch": 1.65, "learning_rate": 1.7593905843151727e-06, "loss": 0.5302, "step": 4389 }, { "epoch": 1.66, "learning_rate": 1.7556636541691663e-06, "loss": 0.5608, "step": 4390 }, { "epoch": 1.66, "learning_rate": 1.7519402957033294e-06, "loss": 0.5801, "step": 4391 }, { "epoch": 1.66, "learning_rate": 1.748220510530726e-06, "loss": 0.537, "step": 4392 }, { "epoch": 1.66, "learning_rate": 1.7445043002628715e-06, "loss": 0.5868, "step": 4393 }, { "epoch": 1.66, "learning_rate": 1.7407916665097347e-06, "loss": 0.5185, "step": 4394 }, { "epoch": 1.66, "learning_rate": 1.7370826108797334e-06, "loss": 0.4944, "step": 4395 }, { "epoch": 1.66, "learning_rate": 1.7333771349797358e-06, "loss": 0.5318, "step": 4396 }, { "epoch": 1.66, "learning_rate": 1.7296752404150575e-06, "loss": 0.5692, "step": 4397 }, { "epoch": 1.66, "learning_rate": 1.7259769287894667e-06, "loss": 0.5057, "step": 4398 }, { "epoch": 1.66, "learning_rate": 1.7222822017051754e-06, "loss": 0.4523, "step": 4399 }, { "epoch": 1.66, "learning_rate": 1.7185910607628441e-06, "loss": 0.5464, "step": 4400 }, { "epoch": 1.66, "learning_rate": 1.7149035075615795e-06, "loss": 0.5141, "step": 4401 }, { "epoch": 1.66, "learning_rate": 1.7112195436989342e-06, "loss": 0.5711, "step": 4402 }, { "epoch": 1.66, "learning_rate": 1.7075391707709077e-06, "loss": 0.5771, "step": 4403 }, { "epoch": 1.66, "learning_rate": 1.7038623903719388e-06, "loss": 0.4804, "step": 4404 }, { "epoch": 1.66, "learning_rate": 1.7001892040949141e-06, "loss": 0.5463, "step": 4405 }, { "epoch": 1.66, "learning_rate": 1.6965196135311634e-06, "loss": 0.4836, "step": 4406 }, { "epoch": 1.66, "learning_rate": 1.6928536202704538e-06, "loss": 0.5283, "step": 4407 }, { "epoch": 1.66, "learning_rate": 1.6891912259010034e-06, "loss": 0.5624, "step": 4408 }, { "epoch": 1.66, "learning_rate": 1.6855324320094602e-06, "loss": 0.4782, "step": 4409 }, { "epoch": 1.66, "learning_rate": 1.6818772401809192e-06, "loss": 0.552, "step": 4410 }, { "epoch": 1.66, "learning_rate": 1.6782256519989126e-06, "loss": 0.5325, "step": 4411 }, { "epoch": 1.66, "learning_rate": 1.674577669045413e-06, "loss": 0.533, "step": 4412 }, { "epoch": 1.66, "learning_rate": 1.670933292900826e-06, "loss": 0.5229, "step": 4413 }, { "epoch": 1.66, "learning_rate": 1.6672925251440043e-06, "loss": 0.5385, "step": 4414 }, { "epoch": 1.66, "learning_rate": 1.6636553673522304e-06, "loss": 0.499, "step": 4415 }, { "epoch": 1.67, "learning_rate": 1.660021821101222e-06, "loss": 0.5343, "step": 4416 }, { "epoch": 1.67, "learning_rate": 1.6563918879651352e-06, "loss": 0.4802, "step": 4417 }, { "epoch": 1.67, "learning_rate": 1.652765569516559e-06, "loss": 0.5304, "step": 4418 }, { "epoch": 1.67, "learning_rate": 1.6491428673265197e-06, "loss": 0.5419, "step": 4419 }, { "epoch": 1.67, "learning_rate": 1.6455237829644699e-06, "loss": 0.5395, "step": 4420 }, { "epoch": 1.67, "learning_rate": 1.6419083179983053e-06, "loss": 0.5665, "step": 4421 }, { "epoch": 1.67, "learning_rate": 1.6382964739943452e-06, "loss": 0.507, "step": 4422 }, { "epoch": 1.67, "learning_rate": 1.6346882525173446e-06, "loss": 0.5478, "step": 4423 }, { "epoch": 1.67, "learning_rate": 1.6310836551304842e-06, "loss": 0.5515, "step": 4424 }, { "epoch": 1.67, "learning_rate": 1.6274826833953783e-06, "loss": 0.5407, "step": 4425 }, { "epoch": 1.67, "learning_rate": 1.6238853388720687e-06, "loss": 0.5628, "step": 4426 }, { "epoch": 1.67, "learning_rate": 1.6202916231190314e-06, "loss": 0.5153, "step": 4427 }, { "epoch": 1.67, "learning_rate": 1.6167015376931638e-06, "loss": 0.5004, "step": 4428 }, { "epoch": 1.67, "learning_rate": 1.6131150841497922e-06, "loss": 0.5264, "step": 4429 }, { "epoch": 1.67, "learning_rate": 1.6095322640426724e-06, "loss": 0.4814, "step": 4430 }, { "epoch": 1.67, "learning_rate": 1.6059530789239797e-06, "loss": 0.5527, "step": 4431 }, { "epoch": 1.67, "learning_rate": 1.6023775303443167e-06, "loss": 0.5019, "step": 4432 }, { "epoch": 1.67, "learning_rate": 1.598805619852719e-06, "loss": 0.5483, "step": 4433 }, { "epoch": 1.67, "learning_rate": 1.5952373489966367e-06, "loss": 0.4809, "step": 4434 }, { "epoch": 1.67, "learning_rate": 1.591672719321945e-06, "loss": 0.5577, "step": 4435 }, { "epoch": 1.67, "learning_rate": 1.5881117323729434e-06, "loss": 0.5469, "step": 4436 }, { "epoch": 1.67, "learning_rate": 1.5845543896923532e-06, "loss": 0.4992, "step": 4437 }, { "epoch": 1.67, "learning_rate": 1.581000692821314e-06, "loss": 0.5885, "step": 4438 }, { "epoch": 1.67, "learning_rate": 1.5774506432993852e-06, "loss": 0.5202, "step": 4439 }, { "epoch": 1.67, "learning_rate": 1.5739042426645556e-06, "loss": 0.542, "step": 4440 }, { "epoch": 1.67, "learning_rate": 1.5703614924532229e-06, "loss": 0.5474, "step": 4441 }, { "epoch": 1.67, "learning_rate": 1.5668223942002082e-06, "loss": 0.5677, "step": 4442 }, { "epoch": 1.68, "learning_rate": 1.563286949438747e-06, "loss": 0.5692, "step": 4443 }, { "epoch": 1.68, "learning_rate": 1.5597551597004968e-06, "loss": 0.5837, "step": 4444 }, { "epoch": 1.68, "learning_rate": 1.5562270265155278e-06, "loss": 0.5103, "step": 4445 }, { "epoch": 1.68, "learning_rate": 1.5527025514123261e-06, "loss": 0.5113, "step": 4446 }, { "epoch": 1.68, "learning_rate": 1.5491817359177962e-06, "loss": 0.5145, "step": 4447 }, { "epoch": 1.68, "learning_rate": 1.5456645815572545e-06, "loss": 0.5396, "step": 4448 }, { "epoch": 1.68, "learning_rate": 1.542151089854431e-06, "loss": 0.5328, "step": 4449 }, { "epoch": 1.68, "learning_rate": 1.5386412623314717e-06, "loss": 0.4726, "step": 4450 }, { "epoch": 1.68, "learning_rate": 1.535135100508932e-06, "loss": 0.5591, "step": 4451 }, { "epoch": 1.68, "learning_rate": 1.5316326059057807e-06, "loss": 0.4995, "step": 4452 }, { "epoch": 1.68, "learning_rate": 1.528133780039397e-06, "loss": 0.525, "step": 4453 }, { "epoch": 1.68, "learning_rate": 1.524638624425574e-06, "loss": 0.5326, "step": 4454 }, { "epoch": 1.68, "learning_rate": 1.5211471405785094e-06, "loss": 0.5232, "step": 4455 }, { "epoch": 1.68, "learning_rate": 1.5176593300108144e-06, "loss": 0.5157, "step": 4456 }, { "epoch": 1.68, "learning_rate": 1.514175194233507e-06, "loss": 0.5421, "step": 4457 }, { "epoch": 1.68, "learning_rate": 1.5106947347560141e-06, "loss": 0.5342, "step": 4458 }, { "epoch": 1.68, "learning_rate": 1.5072179530861686e-06, "loss": 0.5689, "step": 4459 }, { "epoch": 1.68, "learning_rate": 1.503744850730211e-06, "loss": 0.5459, "step": 4460 }, { "epoch": 1.68, "learning_rate": 1.5002754291927878e-06, "loss": 0.5114, "step": 4461 }, { "epoch": 1.68, "learning_rate": 1.4968096899769514e-06, "loss": 0.5645, "step": 4462 }, { "epoch": 1.68, "learning_rate": 1.4933476345841569e-06, "loss": 0.5977, "step": 4463 }, { "epoch": 1.68, "learning_rate": 1.4898892645142637e-06, "loss": 0.5177, "step": 4464 }, { "epoch": 1.68, "learning_rate": 1.4864345812655423e-06, "loss": 0.5583, "step": 4465 }, { "epoch": 1.68, "learning_rate": 1.482983586334652e-06, "loss": 0.6014, "step": 4466 }, { "epoch": 1.68, "learning_rate": 1.4795362812166647e-06, "loss": 0.5725, "step": 4467 }, { "epoch": 1.68, "learning_rate": 1.4760926674050502e-06, "loss": 0.4963, "step": 4468 }, { "epoch": 1.69, "learning_rate": 1.4726527463916795e-06, "loss": 0.579, "step": 4469 }, { "epoch": 1.69, "learning_rate": 1.4692165196668218e-06, "loss": 0.5261, "step": 4470 }, { "epoch": 1.69, "learning_rate": 1.4657839887191527e-06, "loss": 0.5213, "step": 4471 }, { "epoch": 1.69, "learning_rate": 1.4623551550357395e-06, "loss": 0.5751, "step": 4472 }, { "epoch": 1.69, "learning_rate": 1.4589300201020518e-06, "loss": 0.5739, "step": 4473 }, { "epoch": 1.69, "learning_rate": 1.4555085854019512e-06, "loss": 0.5589, "step": 4474 }, { "epoch": 1.69, "learning_rate": 1.4520908524177036e-06, "loss": 0.5257, "step": 4475 }, { "epoch": 1.69, "learning_rate": 1.448676822629963e-06, "loss": 0.5065, "step": 4476 }, { "epoch": 1.69, "learning_rate": 1.4452664975177898e-06, "loss": 0.525, "step": 4477 }, { "epoch": 1.69, "learning_rate": 1.4418598785586312e-06, "loss": 0.5394, "step": 4478 }, { "epoch": 1.69, "learning_rate": 1.438456967228331e-06, "loss": 0.5583, "step": 4479 }, { "epoch": 1.69, "learning_rate": 1.435057765001129e-06, "loss": 0.5318, "step": 4480 }, { "epoch": 1.69, "learning_rate": 1.431662273349651e-06, "loss": 0.5641, "step": 4481 }, { "epoch": 1.69, "learning_rate": 1.4282704937449243e-06, "loss": 0.5435, "step": 4482 }, { "epoch": 1.69, "learning_rate": 1.4248824276563588e-06, "loss": 0.5348, "step": 4483 }, { "epoch": 1.69, "learning_rate": 1.4214980765517684e-06, "loss": 0.5953, "step": 4484 }, { "epoch": 1.69, "learning_rate": 1.4181174418973453e-06, "loss": 0.5036, "step": 4485 }, { "epoch": 1.69, "learning_rate": 1.4147405251576774e-06, "loss": 0.5573, "step": 4486 }, { "epoch": 1.69, "learning_rate": 1.4113673277957395e-06, "loss": 0.5808, "step": 4487 }, { "epoch": 1.69, "learning_rate": 1.4079978512728999e-06, "loss": 0.5042, "step": 4488 }, { "epoch": 1.69, "learning_rate": 1.4046320970489046e-06, "loss": 0.5247, "step": 4489 }, { "epoch": 1.69, "learning_rate": 1.401270066581899e-06, "loss": 0.5849, "step": 4490 }, { "epoch": 1.69, "learning_rate": 1.39791176132841e-06, "loss": 0.5843, "step": 4491 }, { "epoch": 1.69, "learning_rate": 1.394557182743349e-06, "loss": 0.5616, "step": 4492 }, { "epoch": 1.69, "learning_rate": 1.391206332280014e-06, "loss": 0.5937, "step": 4493 }, { "epoch": 1.69, "learning_rate": 1.3878592113900903e-06, "loss": 0.55, "step": 4494 }, { "epoch": 1.69, "learning_rate": 1.3845158215236442e-06, "loss": 0.5282, "step": 4495 }, { "epoch": 1.7, "learning_rate": 1.3811761641291265e-06, "loss": 0.5413, "step": 4496 }, { "epoch": 1.7, "learning_rate": 1.377840240653373e-06, "loss": 0.5073, "step": 4497 }, { "epoch": 1.7, "learning_rate": 1.3745080525415987e-06, "loss": 0.523, "step": 4498 }, { "epoch": 1.7, "learning_rate": 1.3711796012374035e-06, "loss": 0.5925, "step": 4499 }, { "epoch": 1.7, "learning_rate": 1.3678548881827659e-06, "loss": 0.5684, "step": 4500 }, { "epoch": 1.7, "learning_rate": 1.3645339148180458e-06, "loss": 0.5034, "step": 4501 }, { "epoch": 1.7, "learning_rate": 1.361216682581984e-06, "loss": 0.5547, "step": 4502 }, { "epoch": 1.7, "learning_rate": 1.357903192911698e-06, "loss": 0.5755, "step": 4503 }, { "epoch": 1.7, "learning_rate": 1.354593447242686e-06, "loss": 0.5447, "step": 4504 }, { "epoch": 1.7, "learning_rate": 1.3512874470088244e-06, "loss": 0.5534, "step": 4505 }, { "epoch": 1.7, "learning_rate": 1.3479851936423648e-06, "loss": 0.5703, "step": 4506 }, { "epoch": 1.7, "learning_rate": 1.3446866885739375e-06, "loss": 0.5979, "step": 4507 }, { "epoch": 1.7, "learning_rate": 1.3413919332325487e-06, "loss": 0.5759, "step": 4508 }, { "epoch": 1.7, "learning_rate": 1.3381009290455804e-06, "loss": 0.5085, "step": 4509 }, { "epoch": 1.7, "learning_rate": 1.3348136774387877e-06, "loss": 0.5183, "step": 4510 }, { "epoch": 1.7, "learning_rate": 1.3315301798363023e-06, "loss": 0.5054, "step": 4511 }, { "epoch": 1.7, "learning_rate": 1.3282504376606263e-06, "loss": 0.495, "step": 4512 }, { "epoch": 1.7, "learning_rate": 1.3249744523326403e-06, "loss": 0.5734, "step": 4513 }, { "epoch": 1.7, "learning_rate": 1.3217022252715883e-06, "loss": 0.5469, "step": 4514 }, { "epoch": 1.7, "learning_rate": 1.318433757895099e-06, "loss": 0.5297, "step": 4515 }, { "epoch": 1.7, "learning_rate": 1.3151690516191629e-06, "loss": 0.5228, "step": 4516 }, { "epoch": 1.7, "learning_rate": 1.311908107858141e-06, "loss": 0.5548, "step": 4517 }, { "epoch": 1.7, "learning_rate": 1.3086509280247672e-06, "loss": 0.514, "step": 4518 }, { "epoch": 1.7, "learning_rate": 1.3053975135301444e-06, "loss": 0.5541, "step": 4519 }, { "epoch": 1.7, "learning_rate": 1.3021478657837438e-06, "loss": 0.5556, "step": 4520 }, { "epoch": 1.7, "learning_rate": 1.2989019861934038e-06, "loss": 0.5461, "step": 4521 }, { "epoch": 1.71, "learning_rate": 1.295659876165335e-06, "loss": 0.5233, "step": 4522 }, { "epoch": 1.71, "learning_rate": 1.2924215371041116e-06, "loss": 0.535, "step": 4523 }, { "epoch": 1.71, "learning_rate": 1.2891869704126692e-06, "loss": 0.5355, "step": 4524 }, { "epoch": 1.71, "learning_rate": 1.2859561774923157e-06, "loss": 0.4867, "step": 4525 }, { "epoch": 1.71, "learning_rate": 1.2827291597427228e-06, "loss": 0.5128, "step": 4526 }, { "epoch": 1.71, "learning_rate": 1.279505918561923e-06, "loss": 0.5638, "step": 4527 }, { "epoch": 1.71, "learning_rate": 1.2762864553463218e-06, "loss": 0.5226, "step": 4528 }, { "epoch": 1.71, "learning_rate": 1.2730707714906787e-06, "loss": 0.4921, "step": 4529 }, { "epoch": 1.71, "learning_rate": 1.2698588683881185e-06, "loss": 0.5243, "step": 4530 }, { "epoch": 1.71, "learning_rate": 1.2666507474301314e-06, "loss": 0.4922, "step": 4531 }, { "epoch": 1.71, "learning_rate": 1.2634464100065614e-06, "loss": 0.5024, "step": 4532 }, { "epoch": 1.71, "learning_rate": 1.2602458575056198e-06, "loss": 0.5598, "step": 4533 }, { "epoch": 1.71, "learning_rate": 1.25704909131388e-06, "loss": 0.5636, "step": 4534 }, { "epoch": 1.71, "learning_rate": 1.2538561128162685e-06, "loss": 0.564, "step": 4535 }, { "epoch": 1.71, "learning_rate": 1.2506669233960755e-06, "loss": 0.544, "step": 4536 }, { "epoch": 1.71, "learning_rate": 1.2474815244349491e-06, "loss": 0.5901, "step": 4537 }, { "epoch": 1.71, "learning_rate": 1.2442999173128945e-06, "loss": 0.5324, "step": 4538 }, { "epoch": 1.71, "learning_rate": 1.2411221034082687e-06, "loss": 0.5061, "step": 4539 }, { "epoch": 1.71, "learning_rate": 1.2379480840977965e-06, "loss": 0.5087, "step": 4540 }, { "epoch": 1.71, "learning_rate": 1.234777860756552e-06, "loss": 0.5365, "step": 4541 }, { "epoch": 1.71, "learning_rate": 1.2316114347579644e-06, "loss": 0.5581, "step": 4542 }, { "epoch": 1.71, "learning_rate": 1.228448807473821e-06, "loss": 0.5726, "step": 4543 }, { "epoch": 1.71, "learning_rate": 1.2252899802742602e-06, "loss": 0.5393, "step": 4544 }, { "epoch": 1.71, "learning_rate": 1.2221349545277749e-06, "loss": 0.4966, "step": 4545 }, { "epoch": 1.71, "learning_rate": 1.2189837316012132e-06, "loss": 0.5557, "step": 4546 }, { "epoch": 1.71, "learning_rate": 1.2158363128597739e-06, "loss": 0.5298, "step": 4547 }, { "epoch": 1.71, "learning_rate": 1.2126926996670075e-06, "loss": 0.5275, "step": 4548 }, { "epoch": 1.72, "learning_rate": 1.2095528933848165e-06, "loss": 0.5252, "step": 4549 }, { "epoch": 1.72, "learning_rate": 1.2064168953734534e-06, "loss": 0.4816, "step": 4550 }, { "epoch": 1.72, "learning_rate": 1.2032847069915232e-06, "loss": 0.5346, "step": 4551 }, { "epoch": 1.72, "learning_rate": 1.2001563295959762e-06, "loss": 0.5299, "step": 4552 }, { "epoch": 1.72, "learning_rate": 1.197031764542117e-06, "loss": 0.5205, "step": 4553 }, { "epoch": 1.72, "learning_rate": 1.1939110131835929e-06, "loss": 0.581, "step": 4554 }, { "epoch": 1.72, "learning_rate": 1.1907940768724035e-06, "loss": 0.5311, "step": 4555 }, { "epoch": 1.72, "learning_rate": 1.1876809569588932e-06, "loss": 0.5313, "step": 4556 }, { "epoch": 1.72, "learning_rate": 1.184571654791754e-06, "loss": 0.5299, "step": 4557 }, { "epoch": 1.72, "learning_rate": 1.181466171718023e-06, "loss": 0.5896, "step": 4558 }, { "epoch": 1.72, "learning_rate": 1.1783645090830875e-06, "loss": 0.5177, "step": 4559 }, { "epoch": 1.72, "learning_rate": 1.1752666682306702e-06, "loss": 0.5672, "step": 4560 }, { "epoch": 1.72, "learning_rate": 1.1721726505028463e-06, "loss": 0.5348, "step": 4561 }, { "epoch": 1.72, "learning_rate": 1.1690824572400317e-06, "loss": 0.5473, "step": 4562 }, { "epoch": 1.72, "learning_rate": 1.1659960897809852e-06, "loss": 0.5714, "step": 4563 }, { "epoch": 1.72, "learning_rate": 1.1629135494628097e-06, "loss": 0.5222, "step": 4564 }, { "epoch": 1.72, "learning_rate": 1.1598348376209456e-06, "loss": 0.5308, "step": 4565 }, { "epoch": 1.72, "learning_rate": 1.156759955589185e-06, "loss": 0.5589, "step": 4566 }, { "epoch": 1.72, "learning_rate": 1.153688904699648e-06, "loss": 0.5535, "step": 4567 }, { "epoch": 1.72, "learning_rate": 1.1506216862828024e-06, "loss": 0.523, "step": 4568 }, { "epoch": 1.72, "learning_rate": 1.1475583016674553e-06, "loss": 0.4756, "step": 4569 }, { "epoch": 1.72, "learning_rate": 1.1444987521807494e-06, "loss": 0.5358, "step": 4570 }, { "epoch": 1.72, "learning_rate": 1.141443039148169e-06, "loss": 0.6238, "step": 4571 }, { "epoch": 1.72, "learning_rate": 1.1383911638935374e-06, "loss": 0.5594, "step": 4572 }, { "epoch": 1.72, "learning_rate": 1.1353431277390125e-06, "loss": 0.5456, "step": 4573 }, { "epoch": 1.72, "learning_rate": 1.1322989320050925e-06, "loss": 0.4932, "step": 4574 }, { "epoch": 1.73, "learning_rate": 1.1292585780106047e-06, "loss": 0.5164, "step": 4575 }, { "epoch": 1.73, "learning_rate": 1.1262220670727176e-06, "loss": 0.576, "step": 4576 }, { "epoch": 1.73, "learning_rate": 1.1231894005069333e-06, "loss": 0.4996, "step": 4577 }, { "epoch": 1.73, "learning_rate": 1.120160579627092e-06, "loss": 0.5446, "step": 4578 }, { "epoch": 1.73, "learning_rate": 1.1171356057453642e-06, "loss": 0.574, "step": 4579 }, { "epoch": 1.73, "learning_rate": 1.1141144801722514e-06, "loss": 0.57, "step": 4580 }, { "epoch": 1.73, "learning_rate": 1.1110972042165957e-06, "loss": 0.5044, "step": 4581 }, { "epoch": 1.73, "learning_rate": 1.1080837791855614e-06, "loss": 0.5531, "step": 4582 }, { "epoch": 1.73, "learning_rate": 1.1050742063846521e-06, "loss": 0.5356, "step": 4583 }, { "epoch": 1.73, "learning_rate": 1.102068487117698e-06, "loss": 0.5882, "step": 4584 }, { "epoch": 1.73, "learning_rate": 1.0990666226868641e-06, "loss": 0.5349, "step": 4585 }, { "epoch": 1.73, "learning_rate": 1.0960686143926437e-06, "loss": 0.5557, "step": 4586 }, { "epoch": 1.73, "learning_rate": 1.0930744635338575e-06, "loss": 0.5816, "step": 4587 }, { "epoch": 1.73, "learning_rate": 1.090084171407656e-06, "loss": 0.4963, "step": 4588 }, { "epoch": 1.73, "learning_rate": 1.0870977393095229e-06, "loss": 0.519, "step": 4589 }, { "epoch": 1.73, "learning_rate": 1.0841151685332563e-06, "loss": 0.5515, "step": 4590 }, { "epoch": 1.73, "learning_rate": 1.0811364603709983e-06, "loss": 0.5162, "step": 4591 }, { "epoch": 1.73, "learning_rate": 1.078161616113208e-06, "loss": 0.5854, "step": 4592 }, { "epoch": 1.73, "learning_rate": 1.0751906370486708e-06, "loss": 0.5218, "step": 4593 }, { "epoch": 1.73, "learning_rate": 1.0722235244644996e-06, "loss": 0.5567, "step": 4594 }, { "epoch": 1.73, "learning_rate": 1.0692602796461316e-06, "loss": 0.5362, "step": 4595 }, { "epoch": 1.73, "learning_rate": 1.0663009038773286e-06, "loss": 0.4979, "step": 4596 }, { "epoch": 1.73, "learning_rate": 1.0633453984401753e-06, "loss": 0.5092, "step": 4597 }, { "epoch": 1.73, "learning_rate": 1.060393764615083e-06, "loss": 0.4924, "step": 4598 }, { "epoch": 1.73, "learning_rate": 1.0574460036807798e-06, "loss": 0.4789, "step": 4599 }, { "epoch": 1.73, "learning_rate": 1.0545021169143211e-06, "loss": 0.5156, "step": 4600 }, { "epoch": 1.73, "learning_rate": 1.051562105591082e-06, "loss": 0.5534, "step": 4601 }, { "epoch": 1.74, "learning_rate": 1.048625970984758e-06, "loss": 0.5571, "step": 4602 }, { "epoch": 1.74, "learning_rate": 1.0456937143673674e-06, "loss": 0.5098, "step": 4603 }, { "epoch": 1.74, "learning_rate": 1.042765337009245e-06, "loss": 0.5531, "step": 4604 }, { "epoch": 1.74, "learning_rate": 1.0398408401790472e-06, "loss": 0.5252, "step": 4605 }, { "epoch": 1.74, "learning_rate": 1.0369202251437506e-06, "loss": 0.5222, "step": 4606 }, { "epoch": 1.74, "learning_rate": 1.034003493168646e-06, "loss": 0.577, "step": 4607 }, { "epoch": 1.74, "learning_rate": 1.0310906455173453e-06, "loss": 0.539, "step": 4608 }, { "epoch": 1.74, "learning_rate": 1.0281816834517776e-06, "loss": 0.5299, "step": 4609 }, { "epoch": 1.74, "learning_rate": 1.0252766082321874e-06, "loss": 0.5012, "step": 4610 }, { "epoch": 1.74, "learning_rate": 1.0223754211171343e-06, "loss": 0.5463, "step": 4611 }, { "epoch": 1.74, "learning_rate": 1.0194781233634964e-06, "loss": 0.5761, "step": 4612 }, { "epoch": 1.74, "learning_rate": 1.0165847162264642e-06, "loss": 0.5313, "step": 4613 }, { "epoch": 1.74, "learning_rate": 1.0136952009595446e-06, "loss": 0.5676, "step": 4614 }, { "epoch": 1.74, "learning_rate": 1.0108095788145545e-06, "loss": 0.5164, "step": 4615 }, { "epoch": 1.74, "learning_rate": 1.0079278510416313e-06, "loss": 0.5009, "step": 4616 }, { "epoch": 1.74, "learning_rate": 1.005050018889222e-06, "loss": 0.5196, "step": 4617 }, { "epoch": 1.74, "learning_rate": 1.0021760836040794e-06, "loss": 0.5586, "step": 4618 }, { "epoch": 1.74, "learning_rate": 9.993060464312776e-07, "loss": 0.5166, "step": 4619 }, { "epoch": 1.74, "learning_rate": 9.964399086141974e-07, "loss": 0.5072, "step": 4620 }, { "epoch": 1.74, "learning_rate": 9.935776713945288e-07, "loss": 0.5534, "step": 4621 }, { "epoch": 1.74, "learning_rate": 9.90719336012278e-07, "loss": 0.521, "step": 4622 }, { "epoch": 1.74, "learning_rate": 9.878649037057552e-07, "loss": 0.5377, "step": 4623 }, { "epoch": 1.74, "learning_rate": 9.850143757115827e-07, "loss": 0.5227, "step": 4624 }, { "epoch": 1.74, "learning_rate": 9.821677532646867e-07, "loss": 0.5463, "step": 4625 }, { "epoch": 1.74, "learning_rate": 9.793250375983066e-07, "loss": 0.4766, "step": 4626 }, { "epoch": 1.74, "learning_rate": 9.76486229943987e-07, "loss": 0.4971, "step": 4627 }, { "epoch": 1.75, "learning_rate": 9.736513315315787e-07, "loss": 0.5436, "step": 4628 }, { "epoch": 1.75, "learning_rate": 9.708203435892428e-07, "loss": 0.5277, "step": 4629 }, { "epoch": 1.75, "learning_rate": 9.679932673434421e-07, "loss": 0.5537, "step": 4630 }, { "epoch": 1.75, "learning_rate": 9.651701040189465e-07, "loss": 0.528, "step": 4631 }, { "epoch": 1.75, "learning_rate": 9.623508548388327e-07, "loss": 0.5195, "step": 4632 }, { "epoch": 1.75, "learning_rate": 9.595355210244738e-07, "loss": 0.5609, "step": 4633 }, { "epoch": 1.75, "learning_rate": 9.567241037955543e-07, "loss": 0.5755, "step": 4634 }, { "epoch": 1.75, "learning_rate": 9.53916604370062e-07, "loss": 0.5185, "step": 4635 }, { "epoch": 1.75, "learning_rate": 9.51113023964284e-07, "loss": 0.5932, "step": 4636 }, { "epoch": 1.75, "learning_rate": 9.483133637928099e-07, "loss": 0.5231, "step": 4637 }, { "epoch": 1.75, "learning_rate": 9.455176250685338e-07, "loss": 0.5883, "step": 4638 }, { "epoch": 1.75, "learning_rate": 9.427258090026492e-07, "loss": 0.5512, "step": 4639 }, { "epoch": 1.75, "learning_rate": 9.399379168046452e-07, "loss": 0.5512, "step": 4640 }, { "epoch": 1.75, "learning_rate": 9.371539496823201e-07, "loss": 0.4849, "step": 4641 }, { "epoch": 1.75, "learning_rate": 9.34373908841767e-07, "loss": 0.5358, "step": 4642 }, { "epoch": 1.75, "learning_rate": 9.315977954873778e-07, "loss": 0.5538, "step": 4643 }, { "epoch": 1.75, "learning_rate": 9.28825610821843e-07, "loss": 0.5341, "step": 4644 }, { "epoch": 1.75, "learning_rate": 9.260573560461517e-07, "loss": 0.526, "step": 4645 }, { "epoch": 1.75, "learning_rate": 9.232930323595913e-07, "loss": 0.5177, "step": 4646 }, { "epoch": 1.75, "learning_rate": 9.205326409597437e-07, "loss": 0.5378, "step": 4647 }, { "epoch": 1.75, "learning_rate": 9.177761830424903e-07, "loss": 0.5695, "step": 4648 }, { "epoch": 1.75, "learning_rate": 9.150236598020046e-07, "loss": 0.5092, "step": 4649 }, { "epoch": 1.75, "learning_rate": 9.122750724307605e-07, "loss": 0.5271, "step": 4650 }, { "epoch": 1.75, "learning_rate": 9.095304221195212e-07, "loss": 0.5643, "step": 4651 }, { "epoch": 1.75, "learning_rate": 9.067897100573497e-07, "loss": 0.5282, "step": 4652 }, { "epoch": 1.75, "learning_rate": 9.040529374315976e-07, "loss": 0.5498, "step": 4653 }, { "epoch": 1.75, "learning_rate": 9.013201054279142e-07, "loss": 0.55, "step": 4654 }, { "epoch": 1.76, "learning_rate": 8.985912152302378e-07, "loss": 0.4904, "step": 4655 }, { "epoch": 1.76, "learning_rate": 8.95866268020803e-07, "loss": 0.5382, "step": 4656 }, { "epoch": 1.76, "learning_rate": 8.931452649801342e-07, "loss": 0.5248, "step": 4657 }, { "epoch": 1.76, "learning_rate": 8.904282072870452e-07, "loss": 0.5029, "step": 4658 }, { "epoch": 1.76, "learning_rate": 8.87715096118642e-07, "loss": 0.5075, "step": 4659 }, { "epoch": 1.76, "learning_rate": 8.850059326503258e-07, "loss": 0.5377, "step": 4660 }, { "epoch": 1.76, "learning_rate": 8.823007180557786e-07, "loss": 0.5306, "step": 4661 }, { "epoch": 1.76, "learning_rate": 8.795994535069774e-07, "loss": 0.5375, "step": 4662 }, { "epoch": 1.76, "learning_rate": 8.769021401741861e-07, "loss": 0.5608, "step": 4663 }, { "epoch": 1.76, "learning_rate": 8.742087792259579e-07, "loss": 0.5698, "step": 4664 }, { "epoch": 1.76, "learning_rate": 8.715193718291314e-07, "loss": 0.51, "step": 4665 }, { "epoch": 1.76, "learning_rate": 8.688339191488371e-07, "loss": 0.5231, "step": 4666 }, { "epoch": 1.76, "learning_rate": 8.661524223484896e-07, "loss": 0.5098, "step": 4667 }, { "epoch": 1.76, "learning_rate": 8.634748825897854e-07, "loss": 0.5479, "step": 4668 }, { "epoch": 1.76, "learning_rate": 8.608013010327132e-07, "loss": 0.5365, "step": 4669 }, { "epoch": 1.76, "learning_rate": 8.581316788355432e-07, "loss": 0.5283, "step": 4670 }, { "epoch": 1.76, "learning_rate": 8.554660171548312e-07, "loss": 0.5266, "step": 4671 }, { "epoch": 1.76, "learning_rate": 8.52804317145417e-07, "loss": 0.5202, "step": 4672 }, { "epoch": 1.76, "learning_rate": 8.50146579960428e-07, "loss": 0.547, "step": 4673 }, { "epoch": 1.76, "learning_rate": 8.474928067512678e-07, "loss": 0.5286, "step": 4674 }, { "epoch": 1.76, "learning_rate": 8.448429986676298e-07, "loss": 0.5023, "step": 4675 }, { "epoch": 1.76, "learning_rate": 8.421971568574816e-07, "loss": 0.5633, "step": 4676 }, { "epoch": 1.76, "learning_rate": 8.395552824670783e-07, "loss": 0.5717, "step": 4677 }, { "epoch": 1.76, "learning_rate": 8.369173766409533e-07, "loss": 0.5707, "step": 4678 }, { "epoch": 1.76, "learning_rate": 8.342834405219258e-07, "loss": 0.5131, "step": 4679 }, { "epoch": 1.76, "learning_rate": 8.316534752510896e-07, "loss": 0.547, "step": 4680 }, { "epoch": 1.77, "learning_rate": 8.290274819678212e-07, "loss": 0.5418, "step": 4681 }, { "epoch": 1.77, "learning_rate": 8.264054618097761e-07, "loss": 0.5115, "step": 4682 }, { "epoch": 1.77, "learning_rate": 8.237874159128833e-07, "loss": 0.5327, "step": 4683 }, { "epoch": 1.77, "learning_rate": 8.211733454113568e-07, "loss": 0.5757, "step": 4684 }, { "epoch": 1.77, "learning_rate": 8.185632514376873e-07, "loss": 0.5127, "step": 4685 }, { "epoch": 1.77, "learning_rate": 8.159571351226414e-07, "loss": 0.5679, "step": 4686 }, { "epoch": 1.77, "learning_rate": 8.133549975952614e-07, "loss": 0.5217, "step": 4687 }, { "epoch": 1.77, "learning_rate": 8.10756839982868e-07, "loss": 0.5468, "step": 4688 }, { "epoch": 1.77, "learning_rate": 8.081626634110573e-07, "loss": 0.5333, "step": 4689 }, { "epoch": 1.77, "learning_rate": 8.055724690036981e-07, "loss": 0.5241, "step": 4690 }, { "epoch": 1.77, "learning_rate": 8.029862578829351e-07, "loss": 0.5209, "step": 4691 }, { "epoch": 1.77, "learning_rate": 8.004040311691919e-07, "loss": 0.5458, "step": 4692 }, { "epoch": 1.77, "learning_rate": 7.978257899811604e-07, "loss": 0.4969, "step": 4693 }, { "epoch": 1.77, "learning_rate": 7.95251535435807e-07, "loss": 0.4755, "step": 4694 }, { "epoch": 1.77, "learning_rate": 7.926812686483731e-07, "loss": 0.5655, "step": 4695 }, { "epoch": 1.77, "learning_rate": 7.901149907323713e-07, "loss": 0.5217, "step": 4696 }, { "epoch": 1.77, "learning_rate": 7.875527027995855e-07, "loss": 0.5441, "step": 4697 }, { "epoch": 1.77, "learning_rate": 7.849944059600701e-07, "loss": 0.5391, "step": 4698 }, { "epoch": 1.77, "learning_rate": 7.824401013221538e-07, "loss": 0.5489, "step": 4699 }, { "epoch": 1.77, "learning_rate": 7.798897899924318e-07, "loss": 0.5236, "step": 4700 }, { "epoch": 1.77, "learning_rate": 7.773434730757734e-07, "loss": 0.5531, "step": 4701 }, { "epoch": 1.77, "learning_rate": 7.74801151675314e-07, "loss": 0.5436, "step": 4702 }, { "epoch": 1.77, "learning_rate": 7.722628268924593e-07, "loss": 0.5452, "step": 4703 }, { "epoch": 1.77, "learning_rate": 7.697284998268839e-07, "loss": 0.5956, "step": 4704 }, { "epoch": 1.77, "learning_rate": 7.671981715765286e-07, "loss": 0.5242, "step": 4705 }, { "epoch": 1.77, "learning_rate": 7.646718432376066e-07, "loss": 0.5184, "step": 4706 }, { "epoch": 1.77, "learning_rate": 7.621495159045921e-07, "loss": 0.5406, "step": 4707 }, { "epoch": 1.78, "learning_rate": 7.596311906702292e-07, "loss": 0.5537, "step": 4708 }, { "epoch": 1.78, "learning_rate": 7.571168686255271e-07, "loss": 0.5049, "step": 4709 }, { "epoch": 1.78, "learning_rate": 7.546065508597655e-07, "loss": 0.5258, "step": 4710 }, { "epoch": 1.78, "learning_rate": 7.521002384604814e-07, "loss": 0.5829, "step": 4711 }, { "epoch": 1.78, "learning_rate": 7.495979325134806e-07, "loss": 0.5554, "step": 4712 }, { "epoch": 1.78, "learning_rate": 7.470996341028336e-07, "loss": 0.5356, "step": 4713 }, { "epoch": 1.78, "learning_rate": 7.446053443108736e-07, "loss": 0.5276, "step": 4714 }, { "epoch": 1.78, "learning_rate": 7.421150642181984e-07, "loss": 0.5589, "step": 4715 }, { "epoch": 1.78, "learning_rate": 7.396287949036662e-07, "loss": 0.5522, "step": 4716 }, { "epoch": 1.78, "learning_rate": 7.371465374444031e-07, "loss": 0.5292, "step": 4717 }, { "epoch": 1.78, "learning_rate": 7.3466829291579e-07, "loss": 0.4964, "step": 4718 }, { "epoch": 1.78, "learning_rate": 7.321940623914737e-07, "loss": 0.5046, "step": 4719 }, { "epoch": 1.78, "learning_rate": 7.29723846943361e-07, "loss": 0.5275, "step": 4720 }, { "epoch": 1.78, "learning_rate": 7.272576476416182e-07, "loss": 0.5053, "step": 4721 }, { "epoch": 1.78, "learning_rate": 7.24795465554673e-07, "loss": 0.5222, "step": 4722 }, { "epoch": 1.78, "learning_rate": 7.223373017492152e-07, "loss": 0.5774, "step": 4723 }, { "epoch": 1.78, "learning_rate": 7.198831572901876e-07, "loss": 0.5264, "step": 4724 }, { "epoch": 1.78, "learning_rate": 7.174330332407997e-07, "loss": 0.5164, "step": 4725 }, { "epoch": 1.78, "learning_rate": 7.14986930662509e-07, "loss": 0.5598, "step": 4726 }, { "epoch": 1.78, "learning_rate": 7.12544850615039e-07, "loss": 0.5534, "step": 4727 }, { "epoch": 1.78, "learning_rate": 7.101067941563666e-07, "loss": 0.531, "step": 4728 }, { "epoch": 1.78, "learning_rate": 7.076727623427304e-07, "loss": 0.591, "step": 4729 }, { "epoch": 1.78, "learning_rate": 7.052427562286191e-07, "loss": 0.4479, "step": 4730 }, { "epoch": 1.78, "learning_rate": 7.028167768667815e-07, "loss": 0.5362, "step": 4731 }, { "epoch": 1.78, "learning_rate": 7.003948253082216e-07, "loss": 0.5079, "step": 4732 }, { "epoch": 1.78, "learning_rate": 6.979769026021943e-07, "loss": 0.5477, "step": 4733 }, { "epoch": 1.79, "learning_rate": 6.955630097962141e-07, "loss": 0.526, "step": 4734 }, { "epoch": 1.79, "learning_rate": 6.931531479360454e-07, "loss": 0.605, "step": 4735 }, { "epoch": 1.79, "learning_rate": 6.907473180657132e-07, "loss": 0.5779, "step": 4736 }, { "epoch": 1.79, "learning_rate": 6.883455212274892e-07, "loss": 0.5418, "step": 4737 }, { "epoch": 1.79, "learning_rate": 6.85947758461899e-07, "loss": 0.5742, "step": 4738 }, { "epoch": 1.79, "learning_rate": 6.835540308077215e-07, "loss": 0.5391, "step": 4739 }, { "epoch": 1.79, "learning_rate": 6.811643393019895e-07, "loss": 0.5875, "step": 4740 }, { "epoch": 1.79, "learning_rate": 6.787786849799804e-07, "loss": 0.5412, "step": 4741 }, { "epoch": 1.79, "learning_rate": 6.763970688752319e-07, "loss": 0.5442, "step": 4742 }, { "epoch": 1.79, "learning_rate": 6.740194920195264e-07, "loss": 0.5634, "step": 4743 }, { "epoch": 1.79, "learning_rate": 6.716459554428967e-07, "loss": 0.5405, "step": 4744 }, { "epoch": 1.79, "learning_rate": 6.692764601736268e-07, "loss": 0.5412, "step": 4745 }, { "epoch": 1.79, "learning_rate": 6.66911007238249e-07, "loss": 0.5235, "step": 4746 }, { "epoch": 1.79, "learning_rate": 6.645495976615435e-07, "loss": 0.545, "step": 4747 }, { "epoch": 1.79, "learning_rate": 6.62192232466542e-07, "loss": 0.5245, "step": 4748 }, { "epoch": 1.79, "learning_rate": 6.598389126745209e-07, "loss": 0.4927, "step": 4749 }, { "epoch": 1.79, "learning_rate": 6.574896393050034e-07, "loss": 0.5329, "step": 4750 }, { "epoch": 1.79, "learning_rate": 6.551444133757645e-07, "loss": 0.5334, "step": 4751 }, { "epoch": 1.79, "learning_rate": 6.528032359028202e-07, "loss": 0.5157, "step": 4752 }, { "epoch": 1.79, "learning_rate": 6.504661079004348e-07, "loss": 0.533, "step": 4753 }, { "epoch": 1.79, "learning_rate": 6.481330303811206e-07, "loss": 0.5407, "step": 4754 }, { "epoch": 1.79, "learning_rate": 6.458040043556302e-07, "loss": 0.5247, "step": 4755 }, { "epoch": 1.79, "learning_rate": 6.434790308329652e-07, "loss": 0.5555, "step": 4756 }, { "epoch": 1.79, "learning_rate": 6.411581108203713e-07, "loss": 0.533, "step": 4757 }, { "epoch": 1.79, "learning_rate": 6.388412453233339e-07, "loss": 0.5509, "step": 4758 }, { "epoch": 1.79, "learning_rate": 6.365284353455858e-07, "loss": 0.5337, "step": 4759 }, { "epoch": 1.79, "learning_rate": 6.342196818891033e-07, "loss": 0.5353, "step": 4760 }, { "epoch": 1.8, "learning_rate": 6.31914985954103e-07, "loss": 0.5153, "step": 4761 }, { "epoch": 1.8, "learning_rate": 6.296143485390438e-07, "loss": 0.5299, "step": 4762 }, { "epoch": 1.8, "learning_rate": 6.273177706406264e-07, "loss": 0.557, "step": 4763 }, { "epoch": 1.8, "learning_rate": 6.250252532537959e-07, "loss": 0.5401, "step": 4764 }, { "epoch": 1.8, "learning_rate": 6.227367973717347e-07, "loss": 0.5661, "step": 4765 }, { "epoch": 1.8, "learning_rate": 6.204524039858639e-07, "loss": 0.5627, "step": 4766 }, { "epoch": 1.8, "learning_rate": 6.181720740858521e-07, "loss": 0.531, "step": 4767 }, { "epoch": 1.8, "learning_rate": 6.158958086596023e-07, "loss": 0.4864, "step": 4768 }, { "epoch": 1.8, "learning_rate": 6.136236086932546e-07, "loss": 0.5439, "step": 4769 }, { "epoch": 1.8, "learning_rate": 6.113554751711914e-07, "loss": 0.5153, "step": 4770 }, { "epoch": 1.8, "learning_rate": 6.090914090760325e-07, "loss": 0.4698, "step": 4771 }, { "epoch": 1.8, "learning_rate": 6.068314113886343e-07, "loss": 0.5298, "step": 4772 }, { "epoch": 1.8, "learning_rate": 6.045754830880934e-07, "loss": 0.5429, "step": 4773 }, { "epoch": 1.8, "learning_rate": 6.023236251517429e-07, "loss": 0.5408, "step": 4774 }, { "epoch": 1.8, "learning_rate": 6.00075838555152e-07, "loss": 0.4946, "step": 4775 }, { "epoch": 1.8, "learning_rate": 5.978321242721207e-07, "loss": 0.526, "step": 4776 }, { "epoch": 1.8, "learning_rate": 5.95592483274694e-07, "loss": 0.526, "step": 4777 }, { "epoch": 1.8, "learning_rate": 5.933569165331454e-07, "loss": 0.5188, "step": 4778 }, { "epoch": 1.8, "learning_rate": 5.911254250159859e-07, "loss": 0.4896, "step": 4779 }, { "epoch": 1.8, "learning_rate": 5.888980096899643e-07, "loss": 0.5387, "step": 4780 }, { "epoch": 1.8, "learning_rate": 5.866746715200567e-07, "loss": 0.5582, "step": 4781 }, { "epoch": 1.8, "learning_rate": 5.844554114694767e-07, "loss": 0.5566, "step": 4782 }, { "epoch": 1.8, "learning_rate": 5.822402304996733e-07, "loss": 0.5779, "step": 4783 }, { "epoch": 1.8, "learning_rate": 5.80029129570322e-07, "loss": 0.523, "step": 4784 }, { "epoch": 1.8, "learning_rate": 5.778221096393333e-07, "loss": 0.539, "step": 4785 }, { "epoch": 1.8, "learning_rate": 5.756191716628556e-07, "loss": 0.5505, "step": 4786 }, { "epoch": 1.81, "learning_rate": 5.734203165952623e-07, "loss": 0.5926, "step": 4787 }, { "epoch": 1.81, "learning_rate": 5.71225545389158e-07, "loss": 0.5262, "step": 4788 }, { "epoch": 1.81, "learning_rate": 5.69034858995382e-07, "loss": 0.5593, "step": 4789 }, { "epoch": 1.81, "learning_rate": 5.66848258363002e-07, "loss": 0.4576, "step": 4790 }, { "epoch": 1.81, "learning_rate": 5.646657444393111e-07, "loss": 0.5551, "step": 4791 }, { "epoch": 1.81, "learning_rate": 5.62487318169841e-07, "loss": 0.5047, "step": 4792 }, { "epoch": 1.81, "learning_rate": 5.603129804983454e-07, "loss": 0.577, "step": 4793 }, { "epoch": 1.81, "learning_rate": 5.581427323668098e-07, "loss": 0.541, "step": 4794 }, { "epoch": 1.81, "learning_rate": 5.559765747154467e-07, "loss": 0.5677, "step": 4795 }, { "epoch": 1.81, "learning_rate": 5.538145084826974e-07, "loss": 0.535, "step": 4796 }, { "epoch": 1.81, "learning_rate": 5.516565346052305e-07, "loss": 0.5447, "step": 4797 }, { "epoch": 1.81, "learning_rate": 5.495026540179405e-07, "loss": 0.5208, "step": 4798 }, { "epoch": 1.81, "learning_rate": 5.473528676539497e-07, "loss": 0.5222, "step": 4799 }, { "epoch": 1.81, "learning_rate": 5.452071764446077e-07, "loss": 0.5538, "step": 4800 }, { "epoch": 1.81, "learning_rate": 5.430655813194874e-07, "loss": 0.519, "step": 4801 }, { "epoch": 1.81, "learning_rate": 5.40928083206389e-07, "loss": 0.6178, "step": 4802 }, { "epoch": 1.81, "learning_rate": 5.387946830313373e-07, "loss": 0.4759, "step": 4803 }, { "epoch": 1.81, "learning_rate": 5.366653817185819e-07, "loss": 0.5106, "step": 4804 }, { "epoch": 1.81, "learning_rate": 5.345401801905958e-07, "loss": 0.5489, "step": 4805 }, { "epoch": 1.81, "learning_rate": 5.324190793680773e-07, "loss": 0.5549, "step": 4806 }, { "epoch": 1.81, "learning_rate": 5.303020801699465e-07, "loss": 0.5371, "step": 4807 }, { "epoch": 1.81, "learning_rate": 5.281891835133479e-07, "loss": 0.5664, "step": 4808 }, { "epoch": 1.81, "learning_rate": 5.260803903136491e-07, "loss": 0.5197, "step": 4809 }, { "epoch": 1.81, "learning_rate": 5.239757014844371e-07, "loss": 0.5433, "step": 4810 }, { "epoch": 1.81, "learning_rate": 5.218751179375259e-07, "loss": 0.5508, "step": 4811 }, { "epoch": 1.81, "learning_rate": 5.197786405829452e-07, "loss": 0.5567, "step": 4812 }, { "epoch": 1.81, "learning_rate": 5.176862703289487e-07, "loss": 0.5545, "step": 4813 }, { "epoch": 1.82, "learning_rate": 5.155980080820111e-07, "loss": 0.538, "step": 4814 }, { "epoch": 1.82, "learning_rate": 5.135138547468266e-07, "loss": 0.551, "step": 4815 }, { "epoch": 1.82, "learning_rate": 5.114338112263084e-07, "loss": 0.5533, "step": 4816 }, { "epoch": 1.82, "learning_rate": 5.093578784215925e-07, "loss": 0.6057, "step": 4817 }, { "epoch": 1.82, "learning_rate": 5.072860572320315e-07, "loss": 0.5409, "step": 4818 }, { "epoch": 1.82, "learning_rate": 5.052183485551964e-07, "loss": 0.4913, "step": 4819 }, { "epoch": 1.82, "learning_rate": 5.031547532868752e-07, "loss": 0.5387, "step": 4820 }, { "epoch": 1.82, "learning_rate": 5.010952723210793e-07, "loss": 0.5367, "step": 4821 }, { "epoch": 1.82, "learning_rate": 4.990399065500318e-07, "loss": 0.5054, "step": 4822 }, { "epoch": 1.82, "learning_rate": 4.969886568641757e-07, "loss": 0.5556, "step": 4823 }, { "epoch": 1.82, "learning_rate": 4.949415241521716e-07, "loss": 0.5433, "step": 4824 }, { "epoch": 1.82, "learning_rate": 4.928985093008953e-07, "loss": 0.4992, "step": 4825 }, { "epoch": 1.82, "learning_rate": 4.908596131954402e-07, "loss": 0.4852, "step": 4826 }, { "epoch": 1.82, "learning_rate": 4.888248367191096e-07, "loss": 0.5835, "step": 4827 }, { "epoch": 1.82, "learning_rate": 4.867941807534294e-07, "loss": 0.5075, "step": 4828 }, { "epoch": 1.82, "learning_rate": 4.847676461781348e-07, "loss": 0.5147, "step": 4829 }, { "epoch": 1.82, "learning_rate": 4.827452338711824e-07, "loss": 0.5154, "step": 4830 }, { "epoch": 1.82, "learning_rate": 4.807269447087348e-07, "loss": 0.5449, "step": 4831 }, { "epoch": 1.82, "learning_rate": 4.787127795651736e-07, "loss": 0.5143, "step": 4832 }, { "epoch": 1.82, "learning_rate": 4.76702739313093e-07, "loss": 0.5532, "step": 4833 }, { "epoch": 1.82, "learning_rate": 4.746968248232975e-07, "loss": 0.5293, "step": 4834 }, { "epoch": 1.82, "learning_rate": 4.7269503696480534e-07, "loss": 0.5549, "step": 4835 }, { "epoch": 1.82, "learning_rate": 4.706973766048506e-07, "loss": 0.5067, "step": 4836 }, { "epoch": 1.82, "learning_rate": 4.687038446088754e-07, "loss": 0.5405, "step": 4837 }, { "epoch": 1.82, "learning_rate": 4.667144418405345e-07, "loss": 0.5202, "step": 4838 }, { "epoch": 1.82, "learning_rate": 4.6472916916169177e-07, "loss": 0.5245, "step": 4839 }, { "epoch": 1.83, "learning_rate": 4.6274802743242586e-07, "loss": 0.5001, "step": 4840 }, { "epoch": 1.83, "learning_rate": 4.607710175110236e-07, "loss": 0.5731, "step": 4841 }, { "epoch": 1.83, "learning_rate": 4.587981402539776e-07, "loss": 0.5272, "step": 4842 }, { "epoch": 1.83, "learning_rate": 4.568293965159998e-07, "loss": 0.5277, "step": 4843 }, { "epoch": 1.83, "learning_rate": 4.5486478715000227e-07, "loss": 0.573, "step": 4844 }, { "epoch": 1.83, "learning_rate": 4.529043130071109e-07, "loss": 0.5546, "step": 4845 }, { "epoch": 1.83, "learning_rate": 4.509479749366574e-07, "loss": 0.525, "step": 4846 }, { "epoch": 1.83, "learning_rate": 4.48995773786185e-07, "loss": 0.538, "step": 4847 }, { "epoch": 1.83, "learning_rate": 4.4704771040144033e-07, "loss": 0.5137, "step": 4848 }, { "epoch": 1.83, "learning_rate": 4.4510378562638065e-07, "loss": 0.5669, "step": 4849 }, { "epoch": 1.83, "learning_rate": 4.43164000303169e-07, "loss": 0.5598, "step": 4850 }, { "epoch": 1.83, "learning_rate": 4.4122835527217636e-07, "loss": 0.5493, "step": 4851 }, { "epoch": 1.83, "learning_rate": 4.392968513719775e-07, "loss": 0.59, "step": 4852 }, { "epoch": 1.83, "learning_rate": 4.373694894393565e-07, "loss": 0.5108, "step": 4853 }, { "epoch": 1.83, "learning_rate": 4.3544627030929876e-07, "loss": 0.5555, "step": 4854 }, { "epoch": 1.83, "learning_rate": 4.3352719481499883e-07, "loss": 0.5242, "step": 4855 }, { "epoch": 1.83, "learning_rate": 4.3161226378785503e-07, "loss": 0.5067, "step": 4856 }, { "epoch": 1.83, "learning_rate": 4.297014780574682e-07, "loss": 0.5625, "step": 4857 }, { "epoch": 1.83, "learning_rate": 4.2779483845164725e-07, "loss": 0.4657, "step": 4858 }, { "epoch": 1.83, "learning_rate": 4.2589234579639927e-07, "loss": 0.5544, "step": 4859 }, { "epoch": 1.83, "learning_rate": 4.2399400091594154e-07, "loss": 0.5425, "step": 4860 }, { "epoch": 1.83, "learning_rate": 4.2209980463268743e-07, "loss": 0.5053, "step": 4861 }, { "epoch": 1.83, "learning_rate": 4.2020975776725927e-07, "loss": 0.5589, "step": 4862 }, { "epoch": 1.83, "learning_rate": 4.183238611384777e-07, "loss": 0.5079, "step": 4863 }, { "epoch": 1.83, "learning_rate": 4.164421155633658e-07, "loss": 0.5553, "step": 4864 }, { "epoch": 1.83, "learning_rate": 4.1456452185715035e-07, "loss": 0.5451, "step": 4865 }, { "epoch": 1.83, "learning_rate": 4.1269108083325736e-07, "loss": 0.5356, "step": 4866 }, { "epoch": 1.84, "learning_rate": 4.108217933033143e-07, "loss": 0.4866, "step": 4867 }, { "epoch": 1.84, "learning_rate": 4.089566600771522e-07, "loss": 0.5068, "step": 4868 }, { "epoch": 1.84, "learning_rate": 4.0709568196279804e-07, "loss": 0.5227, "step": 4869 }, { "epoch": 1.84, "learning_rate": 4.0523885976647803e-07, "loss": 0.5191, "step": 4870 }, { "epoch": 1.84, "learning_rate": 4.033861942926232e-07, "loss": 0.5036, "step": 4871 }, { "epoch": 1.84, "learning_rate": 4.0153768634385806e-07, "loss": 0.5377, "step": 4872 }, { "epoch": 1.84, "learning_rate": 3.9969333672100983e-07, "loss": 0.5142, "step": 4873 }, { "epoch": 1.84, "learning_rate": 3.97853146223105e-07, "loss": 0.5365, "step": 4874 }, { "epoch": 1.84, "learning_rate": 3.9601711564736355e-07, "loss": 0.5408, "step": 4875 }, { "epoch": 1.84, "learning_rate": 3.9418524578920926e-07, "loss": 0.5013, "step": 4876 }, { "epoch": 1.84, "learning_rate": 3.9235753744225614e-07, "loss": 0.5695, "step": 4877 }, { "epoch": 1.84, "learning_rate": 3.905339913983208e-07, "loss": 0.4772, "step": 4878 }, { "epoch": 1.84, "learning_rate": 3.887146084474147e-07, "loss": 0.5593, "step": 4879 }, { "epoch": 1.84, "learning_rate": 3.8689938937774727e-07, "loss": 0.5496, "step": 4880 }, { "epoch": 1.84, "learning_rate": 3.850883349757217e-07, "loss": 0.512, "step": 4881 }, { "epoch": 1.84, "learning_rate": 3.832814460259393e-07, "loss": 0.4923, "step": 4882 }, { "epoch": 1.84, "learning_rate": 3.8147872331119384e-07, "loss": 0.4935, "step": 4883 }, { "epoch": 1.84, "learning_rate": 3.796801676124795e-07, "loss": 0.5011, "step": 4884 }, { "epoch": 1.84, "learning_rate": 3.778857797089763e-07, "loss": 0.5899, "step": 4885 }, { "epoch": 1.84, "learning_rate": 3.7609556037806694e-07, "loss": 0.5525, "step": 4886 }, { "epoch": 1.84, "learning_rate": 3.7430951039532535e-07, "loss": 0.5501, "step": 4887 }, { "epoch": 1.84, "learning_rate": 3.7252763053451823e-07, "loss": 0.5944, "step": 4888 }, { "epoch": 1.84, "learning_rate": 3.7074992156760693e-07, "loss": 0.5406, "step": 4889 }, { "epoch": 1.84, "learning_rate": 3.6897638426474536e-07, "loss": 0.5565, "step": 4890 }, { "epoch": 1.84, "learning_rate": 3.6720701939428113e-07, "loss": 0.5013, "step": 4891 }, { "epoch": 1.84, "learning_rate": 3.6544182772274895e-07, "loss": 0.5261, "step": 4892 }, { "epoch": 1.85, "learning_rate": 3.6368081001488476e-07, "loss": 0.5933, "step": 4893 }, { "epoch": 1.85, "learning_rate": 3.619239670336094e-07, "loss": 0.5082, "step": 4894 }, { "epoch": 1.85, "learning_rate": 3.601712995400375e-07, "loss": 0.573, "step": 4895 }, { "epoch": 1.85, "learning_rate": 3.5842280829347375e-07, "loss": 0.5457, "step": 4896 }, { "epoch": 1.85, "learning_rate": 3.566784940514145e-07, "loss": 0.5366, "step": 4897 }, { "epoch": 1.85, "learning_rate": 3.549383575695453e-07, "loss": 0.6089, "step": 4898 }, { "epoch": 1.85, "learning_rate": 3.532023996017442e-07, "loss": 0.5944, "step": 4899 }, { "epoch": 1.85, "learning_rate": 3.514706209000762e-07, "loss": 0.5209, "step": 4900 }, { "epoch": 1.85, "learning_rate": 3.497430222147968e-07, "loss": 0.5472, "step": 4901 }, { "epoch": 1.85, "learning_rate": 3.480196042943518e-07, "loss": 0.565, "step": 4902 }, { "epoch": 1.85, "learning_rate": 3.463003678853738e-07, "loss": 0.5274, "step": 4903 }, { "epoch": 1.85, "learning_rate": 3.445853137326838e-07, "loss": 0.5609, "step": 4904 }, { "epoch": 1.85, "learning_rate": 3.4287444257929406e-07, "loss": 0.5605, "step": 4905 }, { "epoch": 1.85, "learning_rate": 3.4116775516640167e-07, "loss": 0.5492, "step": 4906 }, { "epoch": 1.85, "learning_rate": 3.3946525223339076e-07, "loss": 0.5534, "step": 4907 }, { "epoch": 1.85, "learning_rate": 3.377669345178358e-07, "loss": 0.4449, "step": 4908 }, { "epoch": 1.85, "learning_rate": 3.360728027554949e-07, "loss": 0.5578, "step": 4909 }, { "epoch": 1.85, "learning_rate": 3.343828576803132e-07, "loss": 0.5348, "step": 4910 }, { "epoch": 1.85, "learning_rate": 3.326971000244239e-07, "loss": 0.5289, "step": 4911 }, { "epoch": 1.85, "learning_rate": 3.3101553051814614e-07, "loss": 0.5592, "step": 4912 }, { "epoch": 1.85, "learning_rate": 3.2933814988998283e-07, "loss": 0.5432, "step": 4913 }, { "epoch": 1.85, "learning_rate": 3.276649588666203e-07, "loss": 0.5118, "step": 4914 }, { "epoch": 1.85, "learning_rate": 3.259959581729355e-07, "loss": 0.5558, "step": 4915 }, { "epoch": 1.85, "learning_rate": 3.243311485319844e-07, "loss": 0.5108, "step": 4916 }, { "epoch": 1.85, "learning_rate": 3.226705306650113e-07, "loss": 0.5602, "step": 4917 }, { "epoch": 1.85, "learning_rate": 3.210141052914428e-07, "loss": 0.5866, "step": 4918 }, { "epoch": 1.85, "learning_rate": 3.1936187312889056e-07, "loss": 0.5545, "step": 4919 }, { "epoch": 1.86, "learning_rate": 3.177138348931452e-07, "loss": 0.5762, "step": 4920 }, { "epoch": 1.86, "learning_rate": 3.1606999129818574e-07, "loss": 0.4681, "step": 4921 }, { "epoch": 1.86, "learning_rate": 3.1443034305617127e-07, "loss": 0.5622, "step": 4922 }, { "epoch": 1.86, "learning_rate": 3.1279489087744364e-07, "loss": 0.5395, "step": 4923 }, { "epoch": 1.86, "learning_rate": 3.111636354705261e-07, "loss": 0.5031, "step": 4924 }, { "epoch": 1.86, "learning_rate": 3.0953657754212774e-07, "loss": 0.5005, "step": 4925 }, { "epoch": 1.86, "learning_rate": 3.0791371779713365e-07, "loss": 0.511, "step": 4926 }, { "epoch": 1.86, "learning_rate": 3.0629505693861584e-07, "loss": 0.4981, "step": 4927 }, { "epoch": 1.86, "learning_rate": 3.0468059566782003e-07, "loss": 0.5622, "step": 4928 }, { "epoch": 1.86, "learning_rate": 3.030703346841779e-07, "loss": 0.5768, "step": 4929 }, { "epoch": 1.86, "learning_rate": 3.0146427468529915e-07, "loss": 0.5019, "step": 4930 }, { "epoch": 1.86, "learning_rate": 2.998624163669783e-07, "loss": 0.5539, "step": 4931 }, { "epoch": 1.86, "learning_rate": 2.982647604231825e-07, "loss": 0.5015, "step": 4932 }, { "epoch": 1.86, "learning_rate": 2.9667130754606255e-07, "loss": 0.5712, "step": 4933 }, { "epoch": 1.86, "learning_rate": 2.9508205842594727e-07, "loss": 0.5258, "step": 4934 }, { "epoch": 1.86, "learning_rate": 2.934970137513449e-07, "loss": 0.5389, "step": 4935 }, { "epoch": 1.86, "learning_rate": 2.9191617420893934e-07, "loss": 0.49, "step": 4936 }, { "epoch": 1.86, "learning_rate": 2.903395404835985e-07, "loss": 0.5608, "step": 4937 }, { "epoch": 1.86, "learning_rate": 2.887671132583625e-07, "loss": 0.5179, "step": 4938 }, { "epoch": 1.86, "learning_rate": 2.8719889321445316e-07, "loss": 0.536, "step": 4939 }, { "epoch": 1.86, "learning_rate": 2.856348810312659e-07, "loss": 0.5377, "step": 4940 }, { "epoch": 1.86, "learning_rate": 2.8407507738637765e-07, "loss": 0.5693, "step": 4941 }, { "epoch": 1.86, "learning_rate": 2.8251948295553664e-07, "loss": 0.5485, "step": 4942 }, { "epoch": 1.86, "learning_rate": 2.809680984126717e-07, "loss": 0.5098, "step": 4943 }, { "epoch": 1.86, "learning_rate": 2.7942092442988734e-07, "loss": 0.5536, "step": 4944 }, { "epoch": 1.86, "learning_rate": 2.7787796167746183e-07, "loss": 0.5099, "step": 4945 }, { "epoch": 1.87, "learning_rate": 2.763392108238516e-07, "loss": 0.5588, "step": 4946 }, { "epoch": 1.87, "learning_rate": 2.748046725356868e-07, "loss": 0.4908, "step": 4947 }, { "epoch": 1.87, "learning_rate": 2.7327434747777217e-07, "loss": 0.5478, "step": 4948 }, { "epoch": 1.87, "learning_rate": 2.7174823631308856e-07, "loss": 0.519, "step": 4949 }, { "epoch": 1.87, "learning_rate": 2.7022633970279045e-07, "loss": 0.5839, "step": 4950 }, { "epoch": 1.87, "learning_rate": 2.6870865830620705e-07, "loss": 0.5629, "step": 4951 }, { "epoch": 1.87, "learning_rate": 2.6719519278084027e-07, "loss": 0.489, "step": 4952 }, { "epoch": 1.87, "learning_rate": 2.656859437823656e-07, "loss": 0.5443, "step": 4953 }, { "epoch": 1.87, "learning_rate": 2.6418091196463436e-07, "loss": 0.5238, "step": 4954 }, { "epoch": 1.87, "learning_rate": 2.626800979796684e-07, "loss": 0.5164, "step": 4955 }, { "epoch": 1.87, "learning_rate": 2.61183502477661e-07, "loss": 0.58, "step": 4956 }, { "epoch": 1.87, "learning_rate": 2.5969112610698235e-07, "loss": 0.5506, "step": 4957 }, { "epoch": 1.87, "learning_rate": 2.582029695141708e-07, "loss": 0.5952, "step": 4958 }, { "epoch": 1.87, "learning_rate": 2.5671903334393844e-07, "loss": 0.5115, "step": 4959 }, { "epoch": 1.87, "learning_rate": 2.552393182391677e-07, "loss": 0.6085, "step": 4960 }, { "epoch": 1.87, "learning_rate": 2.537638248409136e-07, "loss": 0.5557, "step": 4961 }, { "epoch": 1.87, "learning_rate": 2.5229255378840354e-07, "loss": 0.5688, "step": 4962 }, { "epoch": 1.87, "learning_rate": 2.508255057190301e-07, "loss": 0.4693, "step": 4963 }, { "epoch": 1.87, "learning_rate": 2.493626812683636e-07, "loss": 0.5458, "step": 4964 }, { "epoch": 1.87, "learning_rate": 2.479040810701383e-07, "loss": 0.5594, "step": 4965 }, { "epoch": 1.87, "learning_rate": 2.464497057562631e-07, "loss": 0.5137, "step": 4966 }, { "epoch": 1.87, "learning_rate": 2.4499955595681303e-07, "loss": 0.516, "step": 4967 }, { "epoch": 1.87, "learning_rate": 2.4355363230003424e-07, "loss": 0.5774, "step": 4968 }, { "epoch": 1.87, "learning_rate": 2.4211193541234466e-07, "loss": 0.5288, "step": 4969 }, { "epoch": 1.87, "learning_rate": 2.406744659183247e-07, "loss": 0.5402, "step": 4970 }, { "epoch": 1.87, "learning_rate": 2.392412244407294e-07, "loss": 0.5197, "step": 4971 }, { "epoch": 1.87, "learning_rate": 2.3781221160047773e-07, "loss": 0.5413, "step": 4972 }, { "epoch": 1.88, "learning_rate": 2.3638742801665892e-07, "loss": 0.5488, "step": 4973 }, { "epoch": 1.88, "learning_rate": 2.3496687430652921e-07, "loss": 0.5446, "step": 4974 }, { "epoch": 1.88, "learning_rate": 2.3355055108551516e-07, "loss": 0.5378, "step": 4975 }, { "epoch": 1.88, "learning_rate": 2.32138458967206e-07, "loss": 0.5779, "step": 4976 }, { "epoch": 1.88, "learning_rate": 2.3073059856336122e-07, "loss": 0.5586, "step": 4977 }, { "epoch": 1.88, "learning_rate": 2.2932697048390407e-07, "loss": 0.5425, "step": 4978 }, { "epoch": 1.88, "learning_rate": 2.27927575336927e-07, "loss": 0.4972, "step": 4979 }, { "epoch": 1.88, "learning_rate": 2.2653241372868727e-07, "loss": 0.4714, "step": 4980 }, { "epoch": 1.88, "learning_rate": 2.2514148626361032e-07, "loss": 0.5851, "step": 4981 }, { "epoch": 1.88, "learning_rate": 2.23754793544283e-07, "loss": 0.5021, "step": 4982 }, { "epoch": 1.88, "learning_rate": 2.2237233617146136e-07, "loss": 0.5029, "step": 4983 }, { "epoch": 1.88, "learning_rate": 2.2099411474406528e-07, "loss": 0.5089, "step": 4984 }, { "epoch": 1.88, "learning_rate": 2.1962012985917826e-07, "loss": 0.4918, "step": 4985 }, { "epoch": 1.88, "learning_rate": 2.1825038211204964e-07, "loss": 0.5538, "step": 4986 }, { "epoch": 1.88, "learning_rate": 2.1688487209609254e-07, "loss": 0.5047, "step": 4987 }, { "epoch": 1.88, "learning_rate": 2.15523600402886e-07, "loss": 0.5317, "step": 4988 }, { "epoch": 1.88, "learning_rate": 2.1416656762217048e-07, "loss": 0.5037, "step": 4989 }, { "epoch": 1.88, "learning_rate": 2.1281377434185124e-07, "loss": 0.5095, "step": 4990 }, { "epoch": 1.88, "learning_rate": 2.1146522114799728e-07, "loss": 0.5397, "step": 4991 }, { "epoch": 1.88, "learning_rate": 2.1012090862484013e-07, "loss": 0.5065, "step": 4992 }, { "epoch": 1.88, "learning_rate": 2.0878083735477172e-07, "loss": 0.5732, "step": 4993 }, { "epoch": 1.88, "learning_rate": 2.07445007918351e-07, "loss": 0.5503, "step": 4994 }, { "epoch": 1.88, "learning_rate": 2.0611342089429831e-07, "loss": 0.556, "step": 4995 }, { "epoch": 1.88, "learning_rate": 2.0478607685949225e-07, "loss": 0.5137, "step": 4996 }, { "epoch": 1.88, "learning_rate": 2.0346297638897727e-07, "loss": 0.4896, "step": 4997 }, { "epoch": 1.88, "learning_rate": 2.0214412005595817e-07, "loss": 0.5393, "step": 4998 }, { "epoch": 1.88, "learning_rate": 2.0082950843180016e-07, "loss": 0.5314, "step": 4999 }, { "epoch": 1.89, "learning_rate": 1.9951914208602984e-07, "loss": 0.5462, "step": 5000 }, { "epoch": 1.89, "learning_rate": 1.9821302158633648e-07, "loss": 0.527, "step": 5001 }, { "epoch": 1.89, "learning_rate": 1.9691114749856856e-07, "loss": 0.5237, "step": 5002 }, { "epoch": 1.89, "learning_rate": 1.9561352038673264e-07, "loss": 0.5296, "step": 5003 }, { "epoch": 1.89, "learning_rate": 1.943201408130002e-07, "loss": 0.5613, "step": 5004 }, { "epoch": 1.89, "learning_rate": 1.9303100933769857e-07, "loss": 0.5158, "step": 5005 }, { "epoch": 1.89, "learning_rate": 1.9174612651931546e-07, "loss": 0.5355, "step": 5006 }, { "epoch": 1.89, "learning_rate": 1.904654929145e-07, "loss": 0.5405, "step": 5007 }, { "epoch": 1.89, "learning_rate": 1.8918910907805733e-07, "loss": 0.5421, "step": 5008 }, { "epoch": 1.89, "learning_rate": 1.8791697556295508e-07, "loss": 0.5202, "step": 5009 }, { "epoch": 1.89, "learning_rate": 1.8664909292031573e-07, "loss": 0.4987, "step": 5010 }, { "epoch": 1.89, "learning_rate": 1.853854616994233e-07, "loss": 0.5022, "step": 5011 }, { "epoch": 1.89, "learning_rate": 1.8412608244771758e-07, "loss": 0.5371, "step": 5012 }, { "epoch": 1.89, "learning_rate": 1.8287095571079772e-07, "loss": 0.5027, "step": 5013 }, { "epoch": 1.89, "learning_rate": 1.81620082032421e-07, "loss": 0.5707, "step": 5014 }, { "epoch": 1.89, "learning_rate": 1.8037346195449946e-07, "loss": 0.5608, "step": 5015 }, { "epoch": 1.89, "learning_rate": 1.7913109601710665e-07, "loss": 0.5103, "step": 5016 }, { "epoch": 1.89, "learning_rate": 1.7789298475846873e-07, "loss": 0.5093, "step": 5017 }, { "epoch": 1.89, "learning_rate": 1.7665912871497104e-07, "loss": 0.4893, "step": 5018 }, { "epoch": 1.89, "learning_rate": 1.7542952842115602e-07, "loss": 0.4929, "step": 5019 }, { "epoch": 1.89, "learning_rate": 1.7420418440972198e-07, "loss": 0.5361, "step": 5020 }, { "epoch": 1.89, "learning_rate": 1.7298309721151984e-07, "loss": 0.5734, "step": 5021 }, { "epoch": 1.89, "learning_rate": 1.717662673555609e-07, "loss": 0.5227, "step": 5022 }, { "epoch": 1.89, "learning_rate": 1.7055369536901013e-07, "loss": 0.5111, "step": 5023 }, { "epoch": 1.89, "learning_rate": 1.693453817771873e-07, "loss": 0.5449, "step": 5024 }, { "epoch": 1.89, "learning_rate": 1.6814132710356922e-07, "loss": 0.5575, "step": 5025 }, { "epoch": 1.9, "learning_rate": 1.6694153186978646e-07, "loss": 0.5409, "step": 5026 }, { "epoch": 1.9, "learning_rate": 1.657459965956254e-07, "loss": 0.5317, "step": 5027 }, { "epoch": 1.9, "learning_rate": 1.6455472179902288e-07, "loss": 0.4947, "step": 5028 }, { "epoch": 1.9, "learning_rate": 1.6336770799607494e-07, "loss": 0.5307, "step": 5029 }, { "epoch": 1.9, "learning_rate": 1.621849557010291e-07, "loss": 0.5669, "step": 5030 }, { "epoch": 1.9, "learning_rate": 1.6100646542628661e-07, "loss": 0.5472, "step": 5031 }, { "epoch": 1.9, "learning_rate": 1.5983223768240353e-07, "loss": 0.5478, "step": 5032 }, { "epoch": 1.9, "learning_rate": 1.5866227297808957e-07, "loss": 0.4827, "step": 5033 }, { "epoch": 1.9, "learning_rate": 1.5749657182020482e-07, "loss": 0.5735, "step": 5034 }, { "epoch": 1.9, "learning_rate": 1.5633513471376538e-07, "loss": 0.5442, "step": 5035 }, { "epoch": 1.9, "learning_rate": 1.551779621619376e-07, "loss": 0.5794, "step": 5036 }, { "epoch": 1.9, "learning_rate": 1.5402505466604156e-07, "loss": 0.5222, "step": 5037 }, { "epoch": 1.9, "learning_rate": 1.5287641272554998e-07, "loss": 0.5462, "step": 5038 }, { "epoch": 1.9, "learning_rate": 1.5173203683808812e-07, "loss": 0.51, "step": 5039 }, { "epoch": 1.9, "learning_rate": 1.5059192749942942e-07, "loss": 0.5512, "step": 5040 }, { "epoch": 1.9, "learning_rate": 1.494560852035043e-07, "loss": 0.5488, "step": 5041 }, { "epoch": 1.9, "learning_rate": 1.4832451044239027e-07, "loss": 0.5042, "step": 5042 }, { "epoch": 1.9, "learning_rate": 1.4719720370631741e-07, "loss": 0.5357, "step": 5043 }, { "epoch": 1.9, "learning_rate": 1.4607416548366725e-07, "loss": 0.5527, "step": 5044 }, { "epoch": 1.9, "learning_rate": 1.4495539626097289e-07, "loss": 0.5464, "step": 5045 }, { "epoch": 1.9, "learning_rate": 1.4384089652291544e-07, "loss": 0.544, "step": 5046 }, { "epoch": 1.9, "learning_rate": 1.4273066675232872e-07, "loss": 0.5123, "step": 5047 }, { "epoch": 1.9, "learning_rate": 1.4162470743019464e-07, "loss": 0.5566, "step": 5048 }, { "epoch": 1.9, "learning_rate": 1.4052301903564768e-07, "loss": 0.5454, "step": 5049 }, { "epoch": 1.9, "learning_rate": 1.3942560204596833e-07, "loss": 0.5405, "step": 5050 }, { "epoch": 1.9, "learning_rate": 1.383324569365907e-07, "loss": 0.5683, "step": 5051 }, { "epoch": 1.9, "learning_rate": 1.3724358418109597e-07, "loss": 0.5587, "step": 5052 }, { "epoch": 1.91, "learning_rate": 1.3615898425121345e-07, "loss": 0.543, "step": 5053 }, { "epoch": 1.91, "learning_rate": 1.3507865761682394e-07, "loss": 0.533, "step": 5054 }, { "epoch": 1.91, "learning_rate": 1.3400260474595415e-07, "loss": 0.5434, "step": 5055 }, { "epoch": 1.91, "learning_rate": 1.3293082610478236e-07, "loss": 0.6047, "step": 5056 }, { "epoch": 1.91, "learning_rate": 1.3186332215763153e-07, "loss": 0.522, "step": 5057 }, { "epoch": 1.91, "learning_rate": 1.308000933669773e-07, "loss": 0.5049, "step": 5058 }, { "epoch": 1.91, "learning_rate": 1.2974114019343787e-07, "loss": 0.52, "step": 5059 }, { "epoch": 1.91, "learning_rate": 1.286864630957829e-07, "loss": 0.5938, "step": 5060 }, { "epoch": 1.91, "learning_rate": 1.276360625309281e-07, "loss": 0.5553, "step": 5061 }, { "epoch": 1.91, "learning_rate": 1.26589938953936e-07, "loss": 0.474, "step": 5062 }, { "epoch": 1.91, "learning_rate": 1.255480928180197e-07, "loss": 0.4691, "step": 5063 }, { "epoch": 1.91, "learning_rate": 1.2451052457453484e-07, "loss": 0.5304, "step": 5064 }, { "epoch": 1.91, "learning_rate": 1.23477234672984e-07, "loss": 0.5161, "step": 5065 }, { "epoch": 1.91, "learning_rate": 1.2244822356102026e-07, "loss": 0.5411, "step": 5066 }, { "epoch": 1.91, "learning_rate": 1.214234916844381e-07, "loss": 0.5519, "step": 5067 }, { "epoch": 1.91, "learning_rate": 1.2040303948718024e-07, "loss": 0.5327, "step": 5068 }, { "epoch": 1.91, "learning_rate": 1.1938686741133852e-07, "loss": 0.5402, "step": 5069 }, { "epoch": 1.91, "learning_rate": 1.1837497589714531e-07, "loss": 0.5808, "step": 5070 }, { "epoch": 1.91, "learning_rate": 1.1736736538298099e-07, "loss": 0.5831, "step": 5071 }, { "epoch": 1.91, "learning_rate": 1.1636403630537086e-07, "loss": 0.516, "step": 5072 }, { "epoch": 1.91, "learning_rate": 1.1536498909898497e-07, "loss": 0.5337, "step": 5073 }, { "epoch": 1.91, "learning_rate": 1.1437022419664046e-07, "loss": 0.5846, "step": 5074 }, { "epoch": 1.91, "learning_rate": 1.1337974202929591e-07, "loss": 0.5206, "step": 5075 }, { "epoch": 1.91, "learning_rate": 1.1239354302605699e-07, "loss": 0.5131, "step": 5076 }, { "epoch": 1.91, "learning_rate": 1.1141162761417412e-07, "loss": 0.5633, "step": 5077 }, { "epoch": 1.91, "learning_rate": 1.1043399621904038e-07, "loss": 0.5765, "step": 5078 }, { "epoch": 1.92, "learning_rate": 1.0946064926419253e-07, "loss": 0.5231, "step": 5079 }, { "epoch": 1.92, "learning_rate": 1.0849158717131214e-07, "loss": 0.5655, "step": 5080 }, { "epoch": 1.92, "learning_rate": 1.0752681036022339e-07, "loss": 0.4896, "step": 5081 }, { "epoch": 1.92, "learning_rate": 1.0656631924889749e-07, "loss": 0.5227, "step": 5082 }, { "epoch": 1.92, "learning_rate": 1.0561011425344603e-07, "loss": 0.4699, "step": 5083 }, { "epoch": 1.92, "learning_rate": 1.0465819578812209e-07, "loss": 0.4996, "step": 5084 }, { "epoch": 1.92, "learning_rate": 1.0371056426532689e-07, "loss": 0.5179, "step": 5085 }, { "epoch": 1.92, "learning_rate": 1.0276722009559758e-07, "loss": 0.5401, "step": 5086 }, { "epoch": 1.92, "learning_rate": 1.0182816368761949e-07, "loss": 0.5779, "step": 5087 }, { "epoch": 1.92, "learning_rate": 1.008933954482183e-07, "loss": 0.5334, "step": 5088 }, { "epoch": 1.92, "learning_rate": 9.996291578236228e-08, "loss": 0.5423, "step": 5089 }, { "epoch": 1.92, "learning_rate": 9.903672509316121e-08, "loss": 0.5236, "step": 5090 }, { "epoch": 1.92, "learning_rate": 9.811482378186743e-08, "loss": 0.5749, "step": 5091 }, { "epoch": 1.92, "learning_rate": 9.719721224787482e-08, "loss": 0.5373, "step": 5092 }, { "epoch": 1.92, "learning_rate": 9.62838908887187e-08, "loss": 0.5421, "step": 5093 }, { "epoch": 1.92, "learning_rate": 9.53748601000748e-08, "loss": 0.5318, "step": 5094 }, { "epoch": 1.92, "learning_rate": 9.447012027576252e-08, "loss": 0.5512, "step": 5095 }, { "epoch": 1.92, "learning_rate": 9.356967180774057e-08, "loss": 0.488, "step": 5096 }, { "epoch": 1.92, "learning_rate": 9.26735150861069e-08, "loss": 0.5204, "step": 5097 }, { "epoch": 1.92, "learning_rate": 9.178165049910426e-08, "loss": 0.5704, "step": 5098 }, { "epoch": 1.92, "learning_rate": 9.089407843311248e-08, "loss": 0.5294, "step": 5099 }, { "epoch": 1.92, "learning_rate": 9.001079927265399e-08, "loss": 0.5045, "step": 5100 }, { "epoch": 1.92, "learning_rate": 8.913181340038823e-08, "loss": 0.475, "step": 5101 }, { "epoch": 1.92, "learning_rate": 8.825712119711949e-08, "loss": 0.5696, "step": 5102 }, { "epoch": 1.92, "learning_rate": 8.738672304178686e-08, "loss": 0.5254, "step": 5103 }, { "epoch": 1.92, "learning_rate": 8.652061931147316e-08, "loss": 0.5599, "step": 5104 }, { "epoch": 1.92, "learning_rate": 8.565881038139822e-08, "loss": 0.5654, "step": 5105 }, { "epoch": 1.93, "learning_rate": 8.480129662492231e-08, "loss": 0.5545, "step": 5106 }, { "epoch": 1.93, "learning_rate": 8.394807841354491e-08, "loss": 0.5137, "step": 5107 }, { "epoch": 1.93, "learning_rate": 8.309915611690367e-08, "loss": 0.5346, "step": 5108 }, { "epoch": 1.93, "learning_rate": 8.225453010277662e-08, "loss": 0.5865, "step": 5109 }, { "epoch": 1.93, "learning_rate": 8.141420073707885e-08, "loss": 0.6017, "step": 5110 }, { "epoch": 1.93, "learning_rate": 8.057816838386579e-08, "loss": 0.5135, "step": 5111 }, { "epoch": 1.93, "learning_rate": 7.974643340532884e-08, "loss": 0.4637, "step": 5112 }, { "epoch": 1.93, "learning_rate": 7.891899616180199e-08, "loss": 0.5193, "step": 5113 }, { "epoch": 1.93, "learning_rate": 7.809585701175183e-08, "loss": 0.4907, "step": 5114 }, { "epoch": 1.93, "learning_rate": 7.727701631178641e-08, "loss": 0.5883, "step": 5115 }, { "epoch": 1.93, "learning_rate": 7.646247441665089e-08, "loss": 0.4568, "step": 5116 }, { "epoch": 1.93, "learning_rate": 7.565223167922742e-08, "loss": 0.5522, "step": 5117 }, { "epoch": 1.93, "learning_rate": 7.484628845053743e-08, "loss": 0.5401, "step": 5118 }, { "epoch": 1.93, "learning_rate": 7.404464507973608e-08, "loss": 0.5479, "step": 5119 }, { "epoch": 1.93, "learning_rate": 7.324730191412e-08, "loss": 0.5097, "step": 5120 }, { "epoch": 1.93, "learning_rate": 7.245425929912064e-08, "loss": 0.5115, "step": 5121 }, { "epoch": 1.93, "learning_rate": 7.166551757830543e-08, "loss": 0.5262, "step": 5122 }, { "epoch": 1.93, "learning_rate": 7.088107709337988e-08, "loss": 0.5408, "step": 5123 }, { "epoch": 1.93, "learning_rate": 7.010093818418772e-08, "loss": 0.555, "step": 5124 }, { "epoch": 1.93, "learning_rate": 6.932510118870417e-08, "loss": 0.5261, "step": 5125 }, { "epoch": 1.93, "learning_rate": 6.85535664430459e-08, "loss": 0.5553, "step": 5126 }, { "epoch": 1.93, "learning_rate": 6.778633428146331e-08, "loss": 0.5037, "step": 5127 }, { "epoch": 1.93, "learning_rate": 6.702340503634386e-08, "loss": 0.5495, "step": 5128 }, { "epoch": 1.93, "learning_rate": 6.62647790382076e-08, "loss": 0.5176, "step": 5129 }, { "epoch": 1.93, "learning_rate": 6.551045661571609e-08, "loss": 0.5756, "step": 5130 }, { "epoch": 1.93, "learning_rate": 6.476043809566012e-08, "loss": 0.5684, "step": 5131 }, { "epoch": 1.94, "learning_rate": 6.401472380297091e-08, "loss": 0.5227, "step": 5132 }, { "epoch": 1.94, "learning_rate": 6.327331406071224e-08, "loss": 0.5075, "step": 5133 }, { "epoch": 1.94, "learning_rate": 6.253620919008386e-08, "loss": 0.5349, "step": 5134 }, { "epoch": 1.94, "learning_rate": 6.180340951042141e-08, "loss": 0.5061, "step": 5135 }, { "epoch": 1.94, "learning_rate": 6.107491533919318e-08, "loss": 0.5942, "step": 5136 }, { "epoch": 1.94, "learning_rate": 6.035072699200451e-08, "loss": 0.5588, "step": 5137 }, { "epoch": 1.94, "learning_rate": 5.963084478259329e-08, "loss": 0.5437, "step": 5138 }, { "epoch": 1.94, "learning_rate": 5.891526902283451e-08, "loss": 0.4785, "step": 5139 }, { "epoch": 1.94, "learning_rate": 5.820400002273352e-08, "loss": 0.5576, "step": 5140 }, { "epoch": 1.94, "learning_rate": 5.749703809043494e-08, "loss": 0.5614, "step": 5141 }, { "epoch": 1.94, "learning_rate": 5.679438353221267e-08, "loss": 0.5237, "step": 5142 }, { "epoch": 1.94, "learning_rate": 5.6096036652477645e-08, "loss": 0.5632, "step": 5143 }, { "epoch": 1.94, "learning_rate": 5.5401997753771196e-08, "loss": 0.5624, "step": 5144 }, { "epoch": 1.94, "learning_rate": 5.471226713677502e-08, "loss": 0.5123, "step": 5145 }, { "epoch": 1.94, "learning_rate": 5.4026845100295654e-08, "loss": 0.5061, "step": 5146 }, { "epoch": 1.94, "learning_rate": 5.334573194128001e-08, "loss": 0.5226, "step": 5147 }, { "epoch": 1.94, "learning_rate": 5.266892795480538e-08, "loss": 0.5625, "step": 5148 }, { "epoch": 1.94, "learning_rate": 5.1996433434081675e-08, "loss": 0.5039, "step": 5149 }, { "epoch": 1.94, "learning_rate": 5.132824867045139e-08, "loss": 0.5241, "step": 5150 }, { "epoch": 1.94, "learning_rate": 5.066437395339407e-08, "loss": 0.5419, "step": 5151 }, { "epoch": 1.94, "learning_rate": 5.000480957051635e-08, "loss": 0.5589, "step": 5152 }, { "epoch": 1.94, "learning_rate": 4.934955580756184e-08, "loss": 0.567, "step": 5153 }, { "epoch": 1.94, "learning_rate": 4.869861294840461e-08, "loss": 0.533, "step": 5154 }, { "epoch": 1.94, "learning_rate": 4.805198127505129e-08, "loss": 0.5695, "step": 5155 }, { "epoch": 1.94, "learning_rate": 4.740966106764222e-08, "loss": 0.5264, "step": 5156 }, { "epoch": 1.94, "learning_rate": 4.677165260444705e-08, "loss": 0.4867, "step": 5157 }, { "epoch": 1.94, "learning_rate": 4.613795616186911e-08, "loss": 0.5714, "step": 5158 }, { "epoch": 1.95, "learning_rate": 4.5508572014445475e-08, "loss": 0.5607, "step": 5159 }, { "epoch": 1.95, "learning_rate": 4.4883500434841357e-08, "loss": 0.5653, "step": 5160 }, { "epoch": 1.95, "learning_rate": 4.426274169385569e-08, "loss": 0.524, "step": 5161 }, { "epoch": 1.95, "learning_rate": 4.364629606042003e-08, "loss": 0.5498, "step": 5162 }, { "epoch": 1.95, "learning_rate": 4.30341638015952e-08, "loss": 0.5129, "step": 5163 }, { "epoch": 1.95, "learning_rate": 4.24263451825746e-08, "loss": 0.5056, "step": 5164 }, { "epoch": 1.95, "learning_rate": 4.182284046668206e-08, "loss": 0.4681, "step": 5165 }, { "epoch": 1.95, "learning_rate": 4.1223649915372865e-08, "loss": 0.5486, "step": 5166 }, { "epoch": 1.95, "learning_rate": 4.062877378823382e-08, "loss": 0.5283, "step": 5167 }, { "epoch": 1.95, "learning_rate": 4.003821234298211e-08, "loss": 0.5193, "step": 5168 }, { "epoch": 1.95, "learning_rate": 3.945196583546529e-08, "loss": 0.5449, "step": 5169 }, { "epoch": 1.95, "learning_rate": 3.8870034519662423e-08, "loss": 0.534, "step": 5170 }, { "epoch": 1.95, "learning_rate": 3.829241864768185e-08, "loss": 0.5702, "step": 5171 }, { "epoch": 1.95, "learning_rate": 3.7719118469764505e-08, "loss": 0.5301, "step": 5172 }, { "epoch": 1.95, "learning_rate": 3.715013423427949e-08, "loss": 0.5578, "step": 5173 }, { "epoch": 1.95, "learning_rate": 3.6585466187726293e-08, "loss": 0.4962, "step": 5174 }, { "epoch": 1.95, "learning_rate": 3.602511457473479e-08, "loss": 0.5681, "step": 5175 }, { "epoch": 1.95, "learning_rate": 3.546907963806745e-08, "loss": 0.5513, "step": 5176 }, { "epoch": 1.95, "learning_rate": 3.49173616186127e-08, "loss": 0.5294, "step": 5177 }, { "epoch": 1.95, "learning_rate": 3.436996075539045e-08, "loss": 0.5512, "step": 5178 }, { "epoch": 1.95, "learning_rate": 3.3826877285551006e-08, "loss": 0.5488, "step": 5179 }, { "epoch": 1.95, "learning_rate": 3.3288111444372826e-08, "loss": 0.5327, "step": 5180 }, { "epoch": 1.95, "learning_rate": 3.275366346526476e-08, "loss": 0.518, "step": 5181 }, { "epoch": 1.95, "learning_rate": 3.222353357976382e-08, "loss": 0.552, "step": 5182 }, { "epoch": 1.95, "learning_rate": 3.1697722017539625e-08, "loss": 0.5208, "step": 5183 }, { "epoch": 1.95, "learning_rate": 3.117622900638661e-08, "loss": 0.5465, "step": 5184 }, { "epoch": 1.96, "learning_rate": 3.065905477223075e-08, "loss": 0.5611, "step": 5185 }, { "epoch": 1.96, "learning_rate": 3.014619953912834e-08, "loss": 0.5775, "step": 5186 }, { "epoch": 1.96, "learning_rate": 2.9637663529260562e-08, "loss": 0.5665, "step": 5187 }, { "epoch": 1.96, "learning_rate": 2.9133446962940072e-08, "loss": 0.5086, "step": 5188 }, { "epoch": 1.96, "learning_rate": 2.8633550058607684e-08, "loss": 0.5446, "step": 5189 }, { "epoch": 1.96, "learning_rate": 2.813797303283461e-08, "loss": 0.5406, "step": 5190 }, { "epoch": 1.96, "learning_rate": 2.7646716100316883e-08, "loss": 0.5475, "step": 5191 }, { "epoch": 1.96, "learning_rate": 2.7159779473880932e-08, "loss": 0.5467, "step": 5192 }, { "epoch": 1.96, "learning_rate": 2.667716336448356e-08, "loss": 0.5839, "step": 5193 }, { "epoch": 1.96, "learning_rate": 2.61988679812053e-08, "loss": 0.512, "step": 5194 }, { "epoch": 1.96, "learning_rate": 2.572489353125929e-08, "loss": 0.5608, "step": 5195 }, { "epoch": 1.96, "learning_rate": 2.525524021998349e-08, "loss": 0.5453, "step": 5196 }, { "epoch": 1.96, "learning_rate": 2.4789908250844042e-08, "loss": 0.5725, "step": 5197 }, { "epoch": 1.96, "learning_rate": 2.432889782543857e-08, "loss": 0.5749, "step": 5198 }, { "epoch": 1.96, "learning_rate": 2.3872209143488424e-08, "loss": 0.5393, "step": 5199 }, { "epoch": 1.96, "learning_rate": 2.3419842402844227e-08, "loss": 0.5179, "step": 5200 }, { "epoch": 1.96, "learning_rate": 2.2971797799483664e-08, "loss": 0.5441, "step": 5201 }, { "epoch": 1.96, "learning_rate": 2.252807552751257e-08, "loss": 0.5144, "step": 5202 }, { "epoch": 1.96, "learning_rate": 2.208867577916385e-08, "loss": 0.555, "step": 5203 }, { "epoch": 1.96, "learning_rate": 2.1653598744798553e-08, "loss": 0.4931, "step": 5204 }, { "epoch": 1.96, "learning_rate": 2.122284461290369e-08, "loss": 0.5757, "step": 5205 }, { "epoch": 1.96, "learning_rate": 2.0796413570094432e-08, "loss": 0.535, "step": 5206 }, { "epoch": 1.96, "learning_rate": 2.0374305801114102e-08, "loss": 0.5371, "step": 5207 }, { "epoch": 1.96, "learning_rate": 1.9956521488829762e-08, "loss": 0.5241, "step": 5208 }, { "epoch": 1.96, "learning_rate": 1.9543060814237735e-08, "loss": 0.5603, "step": 5209 }, { "epoch": 1.96, "learning_rate": 1.9133923956461408e-08, "loss": 0.5873, "step": 5210 }, { "epoch": 1.96, "learning_rate": 1.872911109275011e-08, "loss": 0.5035, "step": 5211 }, { "epoch": 1.97, "learning_rate": 1.8328622398481323e-08, "loss": 0.5484, "step": 5212 }, { "epoch": 1.97, "learning_rate": 1.7932458047156265e-08, "loss": 0.5534, "step": 5213 }, { "epoch": 1.97, "learning_rate": 1.7540618210406536e-08, "loss": 0.5391, "step": 5214 }, { "epoch": 1.97, "learning_rate": 1.7153103057987453e-08, "loss": 0.5711, "step": 5215 }, { "epoch": 1.97, "learning_rate": 1.6769912757781392e-08, "loss": 0.538, "step": 5216 }, { "epoch": 1.97, "learning_rate": 1.6391047475796673e-08, "loss": 0.5388, "step": 5217 }, { "epoch": 1.97, "learning_rate": 1.6016507376169776e-08, "loss": 0.5413, "step": 5218 }, { "epoch": 1.97, "learning_rate": 1.5646292621159797e-08, "loss": 0.5602, "step": 5219 }, { "epoch": 1.97, "learning_rate": 1.5280403371157325e-08, "loss": 0.5914, "step": 5220 }, { "epoch": 1.97, "learning_rate": 1.4918839784674456e-08, "loss": 0.5547, "step": 5221 }, { "epoch": 1.97, "learning_rate": 1.4561602018350329e-08, "loss": 0.5235, "step": 5222 }, { "epoch": 1.97, "learning_rate": 1.420869022695115e-08, "loss": 0.4846, "step": 5223 }, { "epoch": 1.97, "learning_rate": 1.386010456336795e-08, "loss": 0.5291, "step": 5224 }, { "epoch": 1.97, "learning_rate": 1.3515845178618814e-08, "loss": 0.5869, "step": 5225 }, { "epoch": 1.97, "learning_rate": 1.3175912221845556e-08, "loss": 0.4739, "step": 5226 }, { "epoch": 1.97, "learning_rate": 1.2840305840317036e-08, "loss": 0.5592, "step": 5227 }, { "epoch": 1.97, "learning_rate": 1.2509026179426953e-08, "loss": 0.5526, "step": 5228 }, { "epoch": 1.97, "learning_rate": 1.2182073382696058e-08, "loss": 0.496, "step": 5229 }, { "epoch": 1.97, "learning_rate": 1.1859447591769934e-08, "loss": 0.5236, "step": 5230 }, { "epoch": 1.97, "learning_rate": 1.154114894641789e-08, "loss": 0.5534, "step": 5231 }, { "epoch": 1.97, "learning_rate": 1.1227177584536286e-08, "loss": 0.5143, "step": 5232 }, { "epoch": 1.97, "learning_rate": 1.0917533642146317e-08, "loss": 0.4902, "step": 5233 }, { "epoch": 1.97, "learning_rate": 1.0612217253395118e-08, "loss": 0.5757, "step": 5234 }, { "epoch": 1.97, "learning_rate": 1.0311228550554664e-08, "loss": 0.5407, "step": 5235 }, { "epoch": 1.97, "learning_rate": 1.0014567664020646e-08, "loss": 0.4653, "step": 5236 }, { "epoch": 1.97, "learning_rate": 9.722234722315816e-09, "loss": 0.4977, "step": 5237 }, { "epoch": 1.98, "learning_rate": 9.43422985208664e-09, "loss": 0.5647, "step": 5238 }, { "epoch": 1.98, "learning_rate": 9.150553178104427e-09, "loss": 0.5243, "step": 5239 }, { "epoch": 1.98, "learning_rate": 8.871204823267531e-09, "loss": 0.5441, "step": 5240 }, { "epoch": 1.98, "learning_rate": 8.596184908596927e-09, "loss": 0.5455, "step": 5241 }, { "epoch": 1.98, "learning_rate": 8.325493553238418e-09, "loss": 0.5367, "step": 5242 }, { "epoch": 1.98, "learning_rate": 8.05913087446375e-09, "loss": 0.5568, "step": 5243 }, { "epoch": 1.98, "learning_rate": 7.797096987668396e-09, "loss": 0.5512, "step": 5244 }, { "epoch": 1.98, "learning_rate": 7.539392006373769e-09, "loss": 0.5111, "step": 5245 }, { "epoch": 1.98, "learning_rate": 7.2860160422227875e-09, "loss": 0.5321, "step": 5246 }, { "epoch": 1.98, "learning_rate": 7.036969204987642e-09, "loss": 0.5263, "step": 5247 }, { "epoch": 1.98, "learning_rate": 6.792251602562028e-09, "loss": 0.5659, "step": 5248 }, { "epoch": 1.98, "learning_rate": 6.55186334096336e-09, "loss": 0.5165, "step": 5249 }, { "epoch": 1.98, "learning_rate": 6.315804524335001e-09, "loss": 0.543, "step": 5250 }, { "epoch": 1.98, "learning_rate": 6.084075254945143e-09, "loss": 0.5055, "step": 5251 }, { "epoch": 1.98, "learning_rate": 5.856675633184594e-09, "loss": 0.505, "step": 5252 }, { "epoch": 1.98, "learning_rate": 5.633605757570104e-09, "loss": 0.5386, "step": 5253 }, { "epoch": 1.98, "learning_rate": 5.414865724739926e-09, "loss": 0.5232, "step": 5254 }, { "epoch": 1.98, "learning_rate": 5.200455629461587e-09, "loss": 0.4777, "step": 5255 }, { "epoch": 1.98, "learning_rate": 4.990375564620786e-09, "loss": 0.5104, "step": 5256 }, { "epoch": 1.98, "learning_rate": 4.784625621232497e-09, "loss": 0.538, "step": 5257 }, { "epoch": 1.98, "learning_rate": 4.583205888430975e-09, "loss": 0.5629, "step": 5258 }, { "epoch": 1.98, "learning_rate": 4.38611645347864e-09, "loss": 0.5275, "step": 5259 }, { "epoch": 1.98, "learning_rate": 4.193357401760523e-09, "loss": 0.5821, "step": 5260 }, { "epoch": 1.98, "learning_rate": 4.00492881678427e-09, "loss": 0.5281, "step": 5261 }, { "epoch": 1.98, "learning_rate": 3.820830780182361e-09, "loss": 0.5007, "step": 5262 }, { "epoch": 1.98, "learning_rate": 3.6410633717109955e-09, "loss": 0.5044, "step": 5263 }, { "epoch": 1.98, "learning_rate": 3.46562666925232e-09, "loss": 0.4958, "step": 5264 }, { "epoch": 1.99, "learning_rate": 3.2945207488099817e-09, "loss": 0.445, "step": 5265 }, { "epoch": 1.99, "learning_rate": 3.1277456845102414e-09, "loss": 0.5822, "step": 5266 }, { "epoch": 1.99, "learning_rate": 2.9653015486064143e-09, "loss": 0.5228, "step": 5267 }, { "epoch": 1.99, "learning_rate": 2.807188411473316e-09, "loss": 0.5155, "step": 5268 }, { "epoch": 1.99, "learning_rate": 2.653406341609488e-09, "loss": 0.5445, "step": 5269 }, { "epoch": 1.99, "learning_rate": 2.5039554056383032e-09, "loss": 0.5563, "step": 5270 }, { "epoch": 1.99, "learning_rate": 2.358835668306858e-09, "loss": 0.5298, "step": 5271 }, { "epoch": 1.99, "learning_rate": 2.2180471924848625e-09, "loss": 0.5374, "step": 5272 }, { "epoch": 1.99, "learning_rate": 2.0815900391646384e-09, "loss": 0.5096, "step": 5273 }, { "epoch": 1.99, "learning_rate": 1.9494642674633413e-09, "loss": 0.5361, "step": 5274 }, { "epoch": 1.99, "learning_rate": 1.8216699346229605e-09, "loss": 0.5911, "step": 5275 }, { "epoch": 1.99, "learning_rate": 1.6982070960069874e-09, "loss": 0.5614, "step": 5276 }, { "epoch": 1.99, "learning_rate": 1.579075805103747e-09, "loss": 0.577, "step": 5277 }, { "epoch": 1.99, "learning_rate": 1.4642761135230665e-09, "loss": 0.5818, "step": 5278 }, { "epoch": 1.99, "learning_rate": 1.353808070999607e-09, "loss": 0.5143, "step": 5279 }, { "epoch": 1.99, "learning_rate": 1.2476717253917526e-09, "loss": 0.6192, "step": 5280 }, { "epoch": 1.99, "learning_rate": 1.1458671226804995e-09, "loss": 0.5596, "step": 5281 }, { "epoch": 1.99, "learning_rate": 1.0483943069716784e-09, "loss": 0.5504, "step": 5282 }, { "epoch": 1.99, "learning_rate": 9.552533204904014e-10, "loss": 0.5831, "step": 5283 }, { "epoch": 1.99, "learning_rate": 8.664442035910548e-10, "loss": 0.5545, "step": 5284 }, { "epoch": 1.99, "learning_rate": 7.819669947461972e-10, "loss": 0.5655, "step": 5285 }, { "epoch": 1.99, "learning_rate": 7.018217305554409e-10, "loss": 0.5552, "step": 5286 }, { "epoch": 1.99, "learning_rate": 6.260084457387905e-10, "loss": 0.5364, "step": 5287 }, { "epoch": 1.99, "learning_rate": 5.545271731399737e-10, "loss": 0.4948, "step": 5288 }, { "epoch": 1.99, "learning_rate": 4.873779437286618e-10, "loss": 0.4834, "step": 5289 }, { "epoch": 1.99, "learning_rate": 4.245607865949186e-10, "loss": 0.4929, "step": 5290 }, { "epoch": 2.0, "learning_rate": 3.6607572895253074e-10, "loss": 0.5454, "step": 5291 }, { "epoch": 2.0, "learning_rate": 3.119227961390081e-10, "loss": 0.5648, "step": 5292 }, { "epoch": 2.0, "learning_rate": 2.621020116155837e-10, "loss": 0.5336, "step": 5293 }, { "epoch": 2.0, "learning_rate": 2.1661339696610328e-10, "loss": 0.4958, "step": 5294 }, { "epoch": 2.0, "learning_rate": 1.7545697189591536e-10, "loss": 0.5918, "step": 5295 }, { "epoch": 2.0, "learning_rate": 1.3863275423742218e-10, "loss": 0.5486, "step": 5296 }, { "epoch": 2.0, "learning_rate": 1.061407599423081e-10, "loss": 0.5329, "step": 5297 }, { "epoch": 2.0, "learning_rate": 7.798100308709089e-11, "loss": 0.5129, "step": 5298 }, { "epoch": 2.0, "learning_rate": 5.4153495872011356e-11, "loss": 0.5385, "step": 5299 }, { "epoch": 2.0, "learning_rate": 3.4658248618812953e-11, "loss": 0.5758, "step": 5300 }, { "epoch": 2.0, "learning_rate": 1.949526977518268e-11, "loss": 0.5805, "step": 5301 }, { "epoch": 2.0, "learning_rate": 8.664565908089728e-12, "loss": 0.5086, "step": 5302 }, { "epoch": 2.0, "learning_rate": 2.166141711557046e-12, "loss": 0.528, "step": 5303 }, { "epoch": 2.0, "learning_rate": 0.0, "loss": 0.524, "step": 5304 }, { "epoch": 2.0, "step": 5304, "total_flos": 5.289027402477863e+18, "train_loss": 0.6110544146332176, "train_runtime": 110621.8051, "train_samples_per_second": 3.068, "train_steps_per_second": 0.048 } ], "logging_steps": 1.0, "max_steps": 5304, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100000.0, "total_flos": 5.289027402477863e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }