{ "best_metric": 0.99837772836593, "best_model_checkpoint": "FFPP-Raw_1FPS_faces-expand-0-aligned\\checkpoint-26172", "epoch": 19.99274047186933, "eval_steps": 500, "global_step": 27540, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 8.668222427368164, "learning_rate": 1.815541031227306e-07, "loss": 0.8207, "step": 10 }, { "epoch": 0.01, "grad_norm": 9.68768310546875, "learning_rate": 3.631082062454612e-07, "loss": 0.8143, "step": 20 }, { "epoch": 0.02, "grad_norm": 7.637276649475098, "learning_rate": 5.446623093681918e-07, "loss": 0.7725, "step": 30 }, { "epoch": 0.03, "grad_norm": 6.712704181671143, "learning_rate": 7.262164124909224e-07, "loss": 0.7328, "step": 40 }, { "epoch": 0.04, "grad_norm": 8.824642181396484, "learning_rate": 9.077705156136529e-07, "loss": 0.6809, "step": 50 }, { "epoch": 0.04, "grad_norm": 5.087621688842773, "learning_rate": 1.0893246187363836e-06, "loss": 0.6397, "step": 60 }, { "epoch": 0.05, "grad_norm": 4.494392395019531, "learning_rate": 1.2708787218591142e-06, "loss": 0.5765, "step": 70 }, { "epoch": 0.06, "grad_norm": 3.251326084136963, "learning_rate": 1.4524328249818447e-06, "loss": 0.5615, "step": 80 }, { "epoch": 0.07, "grad_norm": 2.6707677841186523, "learning_rate": 1.6339869281045753e-06, "loss": 0.5184, "step": 90 }, { "epoch": 0.07, "grad_norm": 3.2091023921966553, "learning_rate": 1.8155410312273058e-06, "loss": 0.5496, "step": 100 }, { "epoch": 0.08, "grad_norm": 2.7207913398742676, "learning_rate": 1.9970951343500366e-06, "loss": 0.531, "step": 110 }, { "epoch": 0.09, "grad_norm": 2.7199337482452393, "learning_rate": 2.178649237472767e-06, "loss": 0.5033, "step": 120 }, { "epoch": 0.09, "grad_norm": 4.677346229553223, "learning_rate": 2.3602033405954977e-06, "loss": 0.5303, "step": 130 }, { "epoch": 0.1, "grad_norm": 2.0803098678588867, "learning_rate": 2.5417574437182283e-06, "loss": 0.4915, "step": 140 }, { "epoch": 0.11, "grad_norm": 3.441002130508423, "learning_rate": 2.723311546840959e-06, "loss": 0.5115, "step": 150 }, { "epoch": 0.12, "grad_norm": 2.644590377807617, "learning_rate": 2.9048656499636894e-06, "loss": 0.5185, "step": 160 }, { "epoch": 0.12, "grad_norm": 3.385890245437622, "learning_rate": 3.0864197530864196e-06, "loss": 0.5344, "step": 170 }, { "epoch": 0.13, "grad_norm": 2.4812233448028564, "learning_rate": 3.2679738562091506e-06, "loss": 0.5047, "step": 180 }, { "epoch": 0.14, "grad_norm": 4.330173969268799, "learning_rate": 3.449527959331881e-06, "loss": 0.4803, "step": 190 }, { "epoch": 0.15, "grad_norm": 3.671731948852539, "learning_rate": 3.6310820624546117e-06, "loss": 0.4431, "step": 200 }, { "epoch": 0.15, "grad_norm": 3.001225233078003, "learning_rate": 3.8126361655773422e-06, "loss": 0.4784, "step": 210 }, { "epoch": 0.16, "grad_norm": 6.147005558013916, "learning_rate": 3.994190268700073e-06, "loss": 0.4897, "step": 220 }, { "epoch": 0.17, "grad_norm": 3.842711925506592, "learning_rate": 4.175744371822803e-06, "loss": 0.4645, "step": 230 }, { "epoch": 0.17, "grad_norm": 5.135747909545898, "learning_rate": 4.357298474945534e-06, "loss": 0.4478, "step": 240 }, { "epoch": 0.18, "grad_norm": 5.1160359382629395, "learning_rate": 4.5388525780682645e-06, "loss": 0.4059, "step": 250 }, { "epoch": 0.19, "grad_norm": 16.816068649291992, "learning_rate": 4.7204066811909955e-06, "loss": 0.3976, "step": 260 }, { "epoch": 0.2, "grad_norm": 7.460231304168701, "learning_rate": 4.901960784313726e-06, "loss": 0.3889, "step": 270 }, { "epoch": 0.2, "grad_norm": 6.641134738922119, "learning_rate": 5.083514887436457e-06, "loss": 0.3661, "step": 280 }, { "epoch": 0.21, "grad_norm": 26.321130752563477, "learning_rate": 5.265068990559187e-06, "loss": 0.387, "step": 290 }, { "epoch": 0.22, "grad_norm": 19.10285186767578, "learning_rate": 5.446623093681918e-06, "loss": 0.3531, "step": 300 }, { "epoch": 0.23, "grad_norm": 10.938743591308594, "learning_rate": 5.628177196804648e-06, "loss": 0.3221, "step": 310 }, { "epoch": 0.23, "grad_norm": 18.0218563079834, "learning_rate": 5.809731299927379e-06, "loss": 0.36, "step": 320 }, { "epoch": 0.24, "grad_norm": 18.682737350463867, "learning_rate": 5.991285403050109e-06, "loss": 0.3285, "step": 330 }, { "epoch": 0.25, "grad_norm": 15.782907485961914, "learning_rate": 6.172839506172839e-06, "loss": 0.2897, "step": 340 }, { "epoch": 0.25, "grad_norm": 8.58176326751709, "learning_rate": 6.354393609295569e-06, "loss": 0.2722, "step": 350 }, { "epoch": 0.26, "grad_norm": 30.12000846862793, "learning_rate": 6.535947712418301e-06, "loss": 0.2498, "step": 360 }, { "epoch": 0.27, "grad_norm": 18.24226188659668, "learning_rate": 6.717501815541031e-06, "loss": 0.2572, "step": 370 }, { "epoch": 0.28, "grad_norm": 8.81354808807373, "learning_rate": 6.899055918663762e-06, "loss": 0.2644, "step": 380 }, { "epoch": 0.28, "grad_norm": 19.555673599243164, "learning_rate": 7.080610021786492e-06, "loss": 0.2591, "step": 390 }, { "epoch": 0.29, "grad_norm": 28.524112701416016, "learning_rate": 7.262164124909223e-06, "loss": 0.2599, "step": 400 }, { "epoch": 0.3, "grad_norm": 23.34917449951172, "learning_rate": 7.443718228031954e-06, "loss": 0.2286, "step": 410 }, { "epoch": 0.3, "grad_norm": 14.82242202758789, "learning_rate": 7.6252723311546845e-06, "loss": 0.1988, "step": 420 }, { "epoch": 0.31, "grad_norm": 16.295961380004883, "learning_rate": 7.806826434277415e-06, "loss": 0.2202, "step": 430 }, { "epoch": 0.32, "grad_norm": 10.531784057617188, "learning_rate": 7.988380537400146e-06, "loss": 0.2101, "step": 440 }, { "epoch": 0.33, "grad_norm": 35.081485748291016, "learning_rate": 8.169934640522877e-06, "loss": 0.2134, "step": 450 }, { "epoch": 0.33, "grad_norm": 19.544342041015625, "learning_rate": 8.351488743645607e-06, "loss": 0.2354, "step": 460 }, { "epoch": 0.34, "grad_norm": 18.607032775878906, "learning_rate": 8.533042846768337e-06, "loss": 0.2113, "step": 470 }, { "epoch": 0.35, "grad_norm": 27.145376205444336, "learning_rate": 8.714596949891069e-06, "loss": 0.1963, "step": 480 }, { "epoch": 0.36, "grad_norm": 54.49102020263672, "learning_rate": 8.896151053013799e-06, "loss": 0.2754, "step": 490 }, { "epoch": 0.36, "grad_norm": 15.611857414245605, "learning_rate": 9.077705156136529e-06, "loss": 0.1953, "step": 500 }, { "epoch": 0.37, "grad_norm": 10.222444534301758, "learning_rate": 9.259259259259259e-06, "loss": 0.1828, "step": 510 }, { "epoch": 0.38, "grad_norm": 30.64798355102539, "learning_rate": 9.440813362381991e-06, "loss": 0.1767, "step": 520 }, { "epoch": 0.38, "grad_norm": 29.964618682861328, "learning_rate": 9.622367465504721e-06, "loss": 0.2311, "step": 530 }, { "epoch": 0.39, "grad_norm": 16.04256820678711, "learning_rate": 9.803921568627451e-06, "loss": 0.207, "step": 540 }, { "epoch": 0.4, "grad_norm": 12.842723846435547, "learning_rate": 9.985475671750181e-06, "loss": 0.2057, "step": 550 }, { "epoch": 0.41, "grad_norm": 20.018783569335938, "learning_rate": 1.0167029774872913e-05, "loss": 0.1949, "step": 560 }, { "epoch": 0.41, "grad_norm": 12.877035140991211, "learning_rate": 1.0348583877995643e-05, "loss": 0.1891, "step": 570 }, { "epoch": 0.42, "grad_norm": 40.139564514160156, "learning_rate": 1.0530137981118374e-05, "loss": 0.1735, "step": 580 }, { "epoch": 0.43, "grad_norm": 27.097129821777344, "learning_rate": 1.0711692084241104e-05, "loss": 0.1776, "step": 590 }, { "epoch": 0.44, "grad_norm": 29.121681213378906, "learning_rate": 1.0893246187363835e-05, "loss": 0.1834, "step": 600 }, { "epoch": 0.44, "grad_norm": 12.37022876739502, "learning_rate": 1.1074800290486566e-05, "loss": 0.197, "step": 610 }, { "epoch": 0.45, "grad_norm": 12.485506057739258, "learning_rate": 1.1256354393609296e-05, "loss": 0.1848, "step": 620 }, { "epoch": 0.46, "grad_norm": 19.3685359954834, "learning_rate": 1.1437908496732026e-05, "loss": 0.1557, "step": 630 }, { "epoch": 0.46, "grad_norm": 7.632847309112549, "learning_rate": 1.1619462599854758e-05, "loss": 0.1862, "step": 640 }, { "epoch": 0.47, "grad_norm": 9.411360740661621, "learning_rate": 1.1801016702977488e-05, "loss": 0.1519, "step": 650 }, { "epoch": 0.48, "grad_norm": 14.69303035736084, "learning_rate": 1.1982570806100218e-05, "loss": 0.1724, "step": 660 }, { "epoch": 0.49, "grad_norm": 21.442214965820312, "learning_rate": 1.2164124909222948e-05, "loss": 0.178, "step": 670 }, { "epoch": 0.49, "grad_norm": 26.165117263793945, "learning_rate": 1.2345679012345678e-05, "loss": 0.1562, "step": 680 }, { "epoch": 0.5, "grad_norm": 54.52130889892578, "learning_rate": 1.2527233115468408e-05, "loss": 0.1784, "step": 690 }, { "epoch": 0.51, "grad_norm": 13.07323932647705, "learning_rate": 1.2708787218591139e-05, "loss": 0.1642, "step": 700 }, { "epoch": 0.52, "grad_norm": 10.937763214111328, "learning_rate": 1.2890341321713872e-05, "loss": 0.1494, "step": 710 }, { "epoch": 0.52, "grad_norm": 25.107704162597656, "learning_rate": 1.3071895424836602e-05, "loss": 0.1698, "step": 720 }, { "epoch": 0.53, "grad_norm": 16.78830909729004, "learning_rate": 1.3253449527959332e-05, "loss": 0.2017, "step": 730 }, { "epoch": 0.54, "grad_norm": 22.479267120361328, "learning_rate": 1.3435003631082063e-05, "loss": 0.2282, "step": 740 }, { "epoch": 0.54, "grad_norm": 12.196043014526367, "learning_rate": 1.3616557734204793e-05, "loss": 0.1682, "step": 750 }, { "epoch": 0.55, "grad_norm": 6.735928535461426, "learning_rate": 1.3798111837327524e-05, "loss": 0.1308, "step": 760 }, { "epoch": 0.56, "grad_norm": 21.126880645751953, "learning_rate": 1.3979665940450255e-05, "loss": 0.194, "step": 770 }, { "epoch": 0.57, "grad_norm": 19.850435256958008, "learning_rate": 1.4161220043572985e-05, "loss": 0.1497, "step": 780 }, { "epoch": 0.57, "grad_norm": 8.330199241638184, "learning_rate": 1.4342774146695717e-05, "loss": 0.1469, "step": 790 }, { "epoch": 0.58, "grad_norm": 25.57647705078125, "learning_rate": 1.4524328249818447e-05, "loss": 0.1964, "step": 800 }, { "epoch": 0.59, "grad_norm": 21.645137786865234, "learning_rate": 1.4705882352941177e-05, "loss": 0.1665, "step": 810 }, { "epoch": 0.6, "grad_norm": 16.67516326904297, "learning_rate": 1.4887436456063909e-05, "loss": 0.2308, "step": 820 }, { "epoch": 0.6, "grad_norm": 15.145057678222656, "learning_rate": 1.5068990559186639e-05, "loss": 0.148, "step": 830 }, { "epoch": 0.61, "grad_norm": 25.186765670776367, "learning_rate": 1.5250544662309369e-05, "loss": 0.1624, "step": 840 }, { "epoch": 0.62, "grad_norm": 10.655858993530273, "learning_rate": 1.54320987654321e-05, "loss": 0.1919, "step": 850 }, { "epoch": 0.62, "grad_norm": 11.99652099609375, "learning_rate": 1.561365286855483e-05, "loss": 0.1785, "step": 860 }, { "epoch": 0.63, "grad_norm": 24.033994674682617, "learning_rate": 1.5795206971677563e-05, "loss": 0.1879, "step": 870 }, { "epoch": 0.64, "grad_norm": 33.82864761352539, "learning_rate": 1.5976761074800293e-05, "loss": 0.1325, "step": 880 }, { "epoch": 0.65, "grad_norm": 23.036666870117188, "learning_rate": 1.6158315177923023e-05, "loss": 0.1517, "step": 890 }, { "epoch": 0.65, "grad_norm": 9.412284851074219, "learning_rate": 1.6339869281045753e-05, "loss": 0.1152, "step": 900 }, { "epoch": 0.66, "grad_norm": 10.256453514099121, "learning_rate": 1.6521423384168483e-05, "loss": 0.0987, "step": 910 }, { "epoch": 0.67, "grad_norm": 23.826547622680664, "learning_rate": 1.6702977487291213e-05, "loss": 0.117, "step": 920 }, { "epoch": 0.68, "grad_norm": 6.3597517013549805, "learning_rate": 1.6884531590413944e-05, "loss": 0.1716, "step": 930 }, { "epoch": 0.68, "grad_norm": 7.3157639503479, "learning_rate": 1.7066085693536674e-05, "loss": 0.1555, "step": 940 }, { "epoch": 0.69, "grad_norm": 20.580583572387695, "learning_rate": 1.7247639796659407e-05, "loss": 0.1315, "step": 950 }, { "epoch": 0.7, "grad_norm": 45.27377700805664, "learning_rate": 1.7429193899782137e-05, "loss": 0.1621, "step": 960 }, { "epoch": 0.7, "grad_norm": 8.26957893371582, "learning_rate": 1.7610748002904868e-05, "loss": 0.1007, "step": 970 }, { "epoch": 0.71, "grad_norm": 15.427580833435059, "learning_rate": 1.7792302106027598e-05, "loss": 0.1367, "step": 980 }, { "epoch": 0.72, "grad_norm": 17.598981857299805, "learning_rate": 1.7973856209150328e-05, "loss": 0.1406, "step": 990 }, { "epoch": 0.73, "grad_norm": 28.66621208190918, "learning_rate": 1.8155410312273058e-05, "loss": 0.1633, "step": 1000 }, { "epoch": 0.73, "grad_norm": 17.14661979675293, "learning_rate": 1.8336964415395788e-05, "loss": 0.1162, "step": 1010 }, { "epoch": 0.74, "grad_norm": 13.592978477478027, "learning_rate": 1.8518518518518518e-05, "loss": 0.2155, "step": 1020 }, { "epoch": 0.75, "grad_norm": 36.90546798706055, "learning_rate": 1.870007262164125e-05, "loss": 0.1604, "step": 1030 }, { "epoch": 0.75, "grad_norm": 11.94582748413086, "learning_rate": 1.8881626724763982e-05, "loss": 0.1997, "step": 1040 }, { "epoch": 0.76, "grad_norm": 18.860889434814453, "learning_rate": 1.9063180827886712e-05, "loss": 0.1189, "step": 1050 }, { "epoch": 0.77, "grad_norm": 13.953115463256836, "learning_rate": 1.9244734931009442e-05, "loss": 0.1091, "step": 1060 }, { "epoch": 0.78, "grad_norm": 8.162593841552734, "learning_rate": 1.9426289034132172e-05, "loss": 0.1032, "step": 1070 }, { "epoch": 0.78, "grad_norm": 13.563828468322754, "learning_rate": 1.9607843137254903e-05, "loss": 0.1666, "step": 1080 }, { "epoch": 0.79, "grad_norm": 9.292993545532227, "learning_rate": 1.9789397240377633e-05, "loss": 0.1254, "step": 1090 }, { "epoch": 0.8, "grad_norm": 12.907480239868164, "learning_rate": 1.9970951343500363e-05, "loss": 0.1551, "step": 1100 }, { "epoch": 0.81, "grad_norm": 9.786681175231934, "learning_rate": 2.0152505446623093e-05, "loss": 0.1087, "step": 1110 }, { "epoch": 0.81, "grad_norm": 11.040635108947754, "learning_rate": 2.0334059549745826e-05, "loss": 0.1282, "step": 1120 }, { "epoch": 0.82, "grad_norm": 12.948040008544922, "learning_rate": 2.0515613652868557e-05, "loss": 0.1329, "step": 1130 }, { "epoch": 0.83, "grad_norm": 36.504207611083984, "learning_rate": 2.0697167755991287e-05, "loss": 0.1145, "step": 1140 }, { "epoch": 0.83, "grad_norm": 9.630166053771973, "learning_rate": 2.0878721859114017e-05, "loss": 0.15, "step": 1150 }, { "epoch": 0.84, "grad_norm": 22.309511184692383, "learning_rate": 2.1060275962236747e-05, "loss": 0.2228, "step": 1160 }, { "epoch": 0.85, "grad_norm": 22.357301712036133, "learning_rate": 2.1241830065359477e-05, "loss": 0.149, "step": 1170 }, { "epoch": 0.86, "grad_norm": 7.280608177185059, "learning_rate": 2.1423384168482207e-05, "loss": 0.1373, "step": 1180 }, { "epoch": 0.86, "grad_norm": 6.142936706542969, "learning_rate": 2.1604938271604937e-05, "loss": 0.118, "step": 1190 }, { "epoch": 0.87, "grad_norm": 5.754367828369141, "learning_rate": 2.178649237472767e-05, "loss": 0.1213, "step": 1200 }, { "epoch": 0.88, "grad_norm": 10.323685646057129, "learning_rate": 2.19680464778504e-05, "loss": 0.1203, "step": 1210 }, { "epoch": 0.89, "grad_norm": 31.037113189697266, "learning_rate": 2.214960058097313e-05, "loss": 0.1149, "step": 1220 }, { "epoch": 0.89, "grad_norm": 17.392948150634766, "learning_rate": 2.233115468409586e-05, "loss": 0.1326, "step": 1230 }, { "epoch": 0.9, "grad_norm": 15.242566108703613, "learning_rate": 2.251270878721859e-05, "loss": 0.1098, "step": 1240 }, { "epoch": 0.91, "grad_norm": 14.354398727416992, "learning_rate": 2.269426289034132e-05, "loss": 0.1288, "step": 1250 }, { "epoch": 0.91, "grad_norm": 8.135055541992188, "learning_rate": 2.2875816993464052e-05, "loss": 0.1553, "step": 1260 }, { "epoch": 0.92, "grad_norm": 11.832168579101562, "learning_rate": 2.3057371096586782e-05, "loss": 0.1088, "step": 1270 }, { "epoch": 0.93, "grad_norm": 15.753230094909668, "learning_rate": 2.3238925199709515e-05, "loss": 0.0886, "step": 1280 }, { "epoch": 0.94, "grad_norm": 11.60594654083252, "learning_rate": 2.3420479302832246e-05, "loss": 0.1142, "step": 1290 }, { "epoch": 0.94, "grad_norm": 9.862659454345703, "learning_rate": 2.3602033405954976e-05, "loss": 0.0894, "step": 1300 }, { "epoch": 0.95, "grad_norm": 21.492176055908203, "learning_rate": 2.3783587509077706e-05, "loss": 0.1154, "step": 1310 }, { "epoch": 0.96, "grad_norm": 14.04377555847168, "learning_rate": 2.3965141612200436e-05, "loss": 0.1221, "step": 1320 }, { "epoch": 0.97, "grad_norm": 7.398568153381348, "learning_rate": 2.4146695715323166e-05, "loss": 0.1275, "step": 1330 }, { "epoch": 0.97, "grad_norm": 10.66848087310791, "learning_rate": 2.4328249818445896e-05, "loss": 0.0981, "step": 1340 }, { "epoch": 0.98, "grad_norm": 7.866222858428955, "learning_rate": 2.4509803921568626e-05, "loss": 0.1048, "step": 1350 }, { "epoch": 0.99, "grad_norm": 10.718284606933594, "learning_rate": 2.4691358024691357e-05, "loss": 0.1096, "step": 1360 }, { "epoch": 0.99, "grad_norm": 38.74601745605469, "learning_rate": 2.487291212781409e-05, "loss": 0.0983, "step": 1370 }, { "epoch": 1.0, "eval_accuracy": 0.9742818895493942, "eval_f1": 0.9425071644137861, "eval_loss": 0.0679459497332573, "eval_precision": 0.916498150431566, "eval_recall": 0.9700354980162873, "eval_roc_auc": 0.9960748609284922, "eval_runtime": 385.505, "eval_samples_per_second": 228.656, "eval_steps_per_second": 14.293, "step": 1377 }, { "epoch": 1.0, "grad_norm": 26.3389892578125, "learning_rate": 2.5054466230936817e-05, "loss": 0.1328, "step": 1380 }, { "epoch": 1.01, "grad_norm": 7.549662113189697, "learning_rate": 2.523602033405955e-05, "loss": 0.1025, "step": 1390 }, { "epoch": 1.02, "grad_norm": 15.942997932434082, "learning_rate": 2.5417574437182277e-05, "loss": 0.0898, "step": 1400 }, { "epoch": 1.02, "grad_norm": 29.061594009399414, "learning_rate": 2.559912854030501e-05, "loss": 0.1226, "step": 1410 }, { "epoch": 1.03, "grad_norm": 5.289678573608398, "learning_rate": 2.5780682643427744e-05, "loss": 0.0947, "step": 1420 }, { "epoch": 1.04, "grad_norm": 46.73320388793945, "learning_rate": 2.596223674655047e-05, "loss": 0.1063, "step": 1430 }, { "epoch": 1.05, "grad_norm": 36.990875244140625, "learning_rate": 2.6143790849673204e-05, "loss": 0.0916, "step": 1440 }, { "epoch": 1.05, "grad_norm": 12.835508346557617, "learning_rate": 2.632534495279593e-05, "loss": 0.1137, "step": 1450 }, { "epoch": 1.06, "grad_norm": 4.24277400970459, "learning_rate": 2.6506899055918665e-05, "loss": 0.1178, "step": 1460 }, { "epoch": 1.07, "grad_norm": 24.840017318725586, "learning_rate": 2.6688453159041395e-05, "loss": 0.0862, "step": 1470 }, { "epoch": 1.07, "grad_norm": 7.686561107635498, "learning_rate": 2.6870007262164125e-05, "loss": 0.0664, "step": 1480 }, { "epoch": 1.08, "grad_norm": 9.543157577514648, "learning_rate": 2.705156136528686e-05, "loss": 0.1024, "step": 1490 }, { "epoch": 1.09, "grad_norm": 17.78160858154297, "learning_rate": 2.7233115468409585e-05, "loss": 0.078, "step": 1500 }, { "epoch": 1.1, "grad_norm": 43.5964241027832, "learning_rate": 2.741466957153232e-05, "loss": 0.1601, "step": 1510 }, { "epoch": 1.1, "grad_norm": 9.158828735351562, "learning_rate": 2.759622367465505e-05, "loss": 0.1064, "step": 1520 }, { "epoch": 1.11, "grad_norm": 10.158084869384766, "learning_rate": 2.777777777777778e-05, "loss": 0.0888, "step": 1530 }, { "epoch": 1.12, "grad_norm": 10.405439376831055, "learning_rate": 2.795933188090051e-05, "loss": 0.0721, "step": 1540 }, { "epoch": 1.13, "grad_norm": 1.5054512023925781, "learning_rate": 2.814088598402324e-05, "loss": 0.0837, "step": 1550 }, { "epoch": 1.13, "grad_norm": 15.655257225036621, "learning_rate": 2.832244008714597e-05, "loss": 0.099, "step": 1560 }, { "epoch": 1.14, "grad_norm": 8.51606559753418, "learning_rate": 2.85039941902687e-05, "loss": 0.1474, "step": 1570 }, { "epoch": 1.15, "grad_norm": 21.121200561523438, "learning_rate": 2.8685548293391433e-05, "loss": 0.1119, "step": 1580 }, { "epoch": 1.15, "grad_norm": 7.0633673667907715, "learning_rate": 2.8867102396514163e-05, "loss": 0.1265, "step": 1590 }, { "epoch": 1.16, "grad_norm": 35.018943786621094, "learning_rate": 2.9048656499636893e-05, "loss": 0.1585, "step": 1600 }, { "epoch": 1.17, "grad_norm": 12.88159465789795, "learning_rate": 2.9230210602759624e-05, "loss": 0.1685, "step": 1610 }, { "epoch": 1.18, "grad_norm": 15.151433944702148, "learning_rate": 2.9411764705882354e-05, "loss": 0.1472, "step": 1620 }, { "epoch": 1.18, "grad_norm": 4.823648452758789, "learning_rate": 2.9593318809005084e-05, "loss": 0.0755, "step": 1630 }, { "epoch": 1.19, "grad_norm": 8.589825630187988, "learning_rate": 2.9774872912127817e-05, "loss": 0.1005, "step": 1640 }, { "epoch": 1.2, "grad_norm": 11.609642028808594, "learning_rate": 2.9956427015250548e-05, "loss": 0.0907, "step": 1650 }, { "epoch": 1.21, "grad_norm": 12.496771812438965, "learning_rate": 3.0137981118373278e-05, "loss": 0.1049, "step": 1660 }, { "epoch": 1.21, "grad_norm": 9.315934181213379, "learning_rate": 3.0319535221496008e-05, "loss": 0.0993, "step": 1670 }, { "epoch": 1.22, "grad_norm": 11.899771690368652, "learning_rate": 3.0501089324618738e-05, "loss": 0.0933, "step": 1680 }, { "epoch": 1.23, "grad_norm": 6.324872970581055, "learning_rate": 3.068264342774147e-05, "loss": 0.1013, "step": 1690 }, { "epoch": 1.23, "grad_norm": 9.256946563720703, "learning_rate": 3.08641975308642e-05, "loss": 0.1089, "step": 1700 }, { "epoch": 1.24, "grad_norm": 3.401277780532837, "learning_rate": 3.104575163398693e-05, "loss": 0.1013, "step": 1710 }, { "epoch": 1.25, "grad_norm": 7.001280784606934, "learning_rate": 3.122730573710966e-05, "loss": 0.0752, "step": 1720 }, { "epoch": 1.26, "grad_norm": 5.906143665313721, "learning_rate": 3.140885984023239e-05, "loss": 0.0926, "step": 1730 }, { "epoch": 1.26, "grad_norm": 8.290616989135742, "learning_rate": 3.1590413943355126e-05, "loss": 0.0871, "step": 1740 }, { "epoch": 1.27, "grad_norm": 15.752880096435547, "learning_rate": 3.177196804647785e-05, "loss": 0.0794, "step": 1750 }, { "epoch": 1.28, "grad_norm": 11.03674030303955, "learning_rate": 3.1953522149600586e-05, "loss": 0.0961, "step": 1760 }, { "epoch": 1.28, "grad_norm": 13.24030876159668, "learning_rate": 3.213507625272331e-05, "loss": 0.089, "step": 1770 }, { "epoch": 1.29, "grad_norm": 22.704883575439453, "learning_rate": 3.2316630355846046e-05, "loss": 0.0917, "step": 1780 }, { "epoch": 1.3, "grad_norm": 4.330246925354004, "learning_rate": 3.249818445896877e-05, "loss": 0.118, "step": 1790 }, { "epoch": 1.31, "grad_norm": 12.068599700927734, "learning_rate": 3.2679738562091506e-05, "loss": 0.1086, "step": 1800 }, { "epoch": 1.31, "grad_norm": 6.073216438293457, "learning_rate": 3.286129266521423e-05, "loss": 0.1285, "step": 1810 }, { "epoch": 1.32, "grad_norm": 8.668619155883789, "learning_rate": 3.304284676833697e-05, "loss": 0.0784, "step": 1820 }, { "epoch": 1.33, "grad_norm": 2.0747952461242676, "learning_rate": 3.32244008714597e-05, "loss": 0.0958, "step": 1830 }, { "epoch": 1.34, "grad_norm": 21.166549682617188, "learning_rate": 3.340595497458243e-05, "loss": 0.1532, "step": 1840 }, { "epoch": 1.34, "grad_norm": 19.43235969543457, "learning_rate": 3.358750907770516e-05, "loss": 0.1373, "step": 1850 }, { "epoch": 1.35, "grad_norm": 6.905423164367676, "learning_rate": 3.376906318082789e-05, "loss": 0.1291, "step": 1860 }, { "epoch": 1.36, "grad_norm": 17.511754989624023, "learning_rate": 3.395061728395062e-05, "loss": 0.1032, "step": 1870 }, { "epoch": 1.36, "grad_norm": 3.903677225112915, "learning_rate": 3.413217138707335e-05, "loss": 0.0923, "step": 1880 }, { "epoch": 1.37, "grad_norm": 11.573185920715332, "learning_rate": 3.431372549019608e-05, "loss": 0.0783, "step": 1890 }, { "epoch": 1.38, "grad_norm": 11.51346492767334, "learning_rate": 3.4495279593318815e-05, "loss": 0.0799, "step": 1900 }, { "epoch": 1.39, "grad_norm": 3.603184223175049, "learning_rate": 3.467683369644154e-05, "loss": 0.0777, "step": 1910 }, { "epoch": 1.39, "grad_norm": 19.509370803833008, "learning_rate": 3.4858387799564275e-05, "loss": 0.1354, "step": 1920 }, { "epoch": 1.4, "grad_norm": 16.0355281829834, "learning_rate": 3.5039941902687e-05, "loss": 0.246, "step": 1930 }, { "epoch": 1.41, "grad_norm": 13.758955955505371, "learning_rate": 3.5221496005809735e-05, "loss": 0.0844, "step": 1940 }, { "epoch": 1.42, "grad_norm": 13.882157325744629, "learning_rate": 3.540305010893246e-05, "loss": 0.0778, "step": 1950 }, { "epoch": 1.42, "grad_norm": 15.574724197387695, "learning_rate": 3.5584604212055195e-05, "loss": 0.0828, "step": 1960 }, { "epoch": 1.43, "grad_norm": 8.679441452026367, "learning_rate": 3.5766158315177926e-05, "loss": 0.0776, "step": 1970 }, { "epoch": 1.44, "grad_norm": 53.65000534057617, "learning_rate": 3.5947712418300656e-05, "loss": 0.0931, "step": 1980 }, { "epoch": 1.44, "grad_norm": 15.00185775756836, "learning_rate": 3.6129266521423386e-05, "loss": 0.1215, "step": 1990 }, { "epoch": 1.45, "grad_norm": 9.312851905822754, "learning_rate": 3.6310820624546116e-05, "loss": 0.103, "step": 2000 }, { "epoch": 1.46, "grad_norm": 9.61681842803955, "learning_rate": 3.6492374727668846e-05, "loss": 0.0601, "step": 2010 }, { "epoch": 1.47, "grad_norm": 7.561996936798096, "learning_rate": 3.6673928830791576e-05, "loss": 0.0572, "step": 2020 }, { "epoch": 1.47, "grad_norm": 8.18752384185791, "learning_rate": 3.6855482933914306e-05, "loss": 0.1414, "step": 2030 }, { "epoch": 1.48, "grad_norm": 5.567200660705566, "learning_rate": 3.7037037037037037e-05, "loss": 0.0717, "step": 2040 }, { "epoch": 1.49, "grad_norm": 2.777528762817383, "learning_rate": 3.721859114015977e-05, "loss": 0.0535, "step": 2050 }, { "epoch": 1.5, "grad_norm": 16.293184280395508, "learning_rate": 3.74001452432825e-05, "loss": 0.0705, "step": 2060 }, { "epoch": 1.5, "grad_norm": 6.650923252105713, "learning_rate": 3.758169934640523e-05, "loss": 0.0741, "step": 2070 }, { "epoch": 1.51, "grad_norm": 11.978032112121582, "learning_rate": 3.7763253449527964e-05, "loss": 0.0506, "step": 2080 }, { "epoch": 1.52, "grad_norm": 11.749996185302734, "learning_rate": 3.7944807552650694e-05, "loss": 0.0926, "step": 2090 }, { "epoch": 1.52, "grad_norm": 8.854286193847656, "learning_rate": 3.8126361655773424e-05, "loss": 0.0767, "step": 2100 }, { "epoch": 1.53, "grad_norm": 5.265554428100586, "learning_rate": 3.8307915758896154e-05, "loss": 0.0709, "step": 2110 }, { "epoch": 1.54, "grad_norm": 8.641592979431152, "learning_rate": 3.8489469862018884e-05, "loss": 0.0957, "step": 2120 }, { "epoch": 1.55, "grad_norm": 15.576934814453125, "learning_rate": 3.8671023965141615e-05, "loss": 0.0742, "step": 2130 }, { "epoch": 1.55, "grad_norm": 5.454996109008789, "learning_rate": 3.8852578068264345e-05, "loss": 0.0745, "step": 2140 }, { "epoch": 1.56, "grad_norm": 7.110592842102051, "learning_rate": 3.9034132171387075e-05, "loss": 0.0785, "step": 2150 }, { "epoch": 1.57, "grad_norm": 9.214689254760742, "learning_rate": 3.9215686274509805e-05, "loss": 0.0619, "step": 2160 }, { "epoch": 1.58, "grad_norm": 3.3824822902679443, "learning_rate": 3.9397240377632535e-05, "loss": 0.1681, "step": 2170 }, { "epoch": 1.58, "grad_norm": 4.873193264007568, "learning_rate": 3.9578794480755265e-05, "loss": 0.0957, "step": 2180 }, { "epoch": 1.59, "grad_norm": 14.703625679016113, "learning_rate": 3.9760348583877995e-05, "loss": 0.0648, "step": 2190 }, { "epoch": 1.6, "grad_norm": 13.431695938110352, "learning_rate": 3.9941902687000726e-05, "loss": 0.0711, "step": 2200 }, { "epoch": 1.6, "grad_norm": 13.710531234741211, "learning_rate": 4.012345679012346e-05, "loss": 0.0867, "step": 2210 }, { "epoch": 1.61, "grad_norm": 9.267621994018555, "learning_rate": 4.0305010893246186e-05, "loss": 0.0975, "step": 2220 }, { "epoch": 1.62, "grad_norm": 7.971125602722168, "learning_rate": 4.048656499636892e-05, "loss": 0.0535, "step": 2230 }, { "epoch": 1.63, "grad_norm": 20.38298988342285, "learning_rate": 4.066811909949165e-05, "loss": 0.0907, "step": 2240 }, { "epoch": 1.63, "grad_norm": 13.68902587890625, "learning_rate": 4.084967320261438e-05, "loss": 0.0778, "step": 2250 }, { "epoch": 1.64, "grad_norm": 4.450131893157959, "learning_rate": 4.103122730573711e-05, "loss": 0.0623, "step": 2260 }, { "epoch": 1.65, "grad_norm": 3.162184238433838, "learning_rate": 4.121278140885984e-05, "loss": 0.0746, "step": 2270 }, { "epoch": 1.66, "grad_norm": 16.87812042236328, "learning_rate": 4.1394335511982573e-05, "loss": 0.083, "step": 2280 }, { "epoch": 1.66, "grad_norm": 6.631977081298828, "learning_rate": 4.1575889615105304e-05, "loss": 0.0634, "step": 2290 }, { "epoch": 1.67, "grad_norm": 13.702621459960938, "learning_rate": 4.1757443718228034e-05, "loss": 0.1152, "step": 2300 }, { "epoch": 1.68, "grad_norm": 9.709258079528809, "learning_rate": 4.193899782135077e-05, "loss": 0.0727, "step": 2310 }, { "epoch": 1.68, "grad_norm": 15.56152057647705, "learning_rate": 4.2120551924473494e-05, "loss": 0.0734, "step": 2320 }, { "epoch": 1.69, "grad_norm": 14.451997756958008, "learning_rate": 4.230210602759623e-05, "loss": 0.0906, "step": 2330 }, { "epoch": 1.7, "grad_norm": 3.7484872341156006, "learning_rate": 4.2483660130718954e-05, "loss": 0.0724, "step": 2340 }, { "epoch": 1.71, "grad_norm": 7.64939546585083, "learning_rate": 4.266521423384169e-05, "loss": 0.0753, "step": 2350 }, { "epoch": 1.71, "grad_norm": 5.493325710296631, "learning_rate": 4.2846768336964415e-05, "loss": 0.0781, "step": 2360 }, { "epoch": 1.72, "grad_norm": 23.490360260009766, "learning_rate": 4.302832244008715e-05, "loss": 0.0908, "step": 2370 }, { "epoch": 1.73, "grad_norm": 10.67781925201416, "learning_rate": 4.3209876543209875e-05, "loss": 0.0996, "step": 2380 }, { "epoch": 1.74, "grad_norm": 1.9585663080215454, "learning_rate": 4.339143064633261e-05, "loss": 0.0895, "step": 2390 }, { "epoch": 1.74, "grad_norm": 17.196063995361328, "learning_rate": 4.357298474945534e-05, "loss": 0.0667, "step": 2400 }, { "epoch": 1.75, "grad_norm": 18.92582130432129, "learning_rate": 4.375453885257807e-05, "loss": 0.0947, "step": 2410 }, { "epoch": 1.76, "grad_norm": 17.93027114868164, "learning_rate": 4.39360929557008e-05, "loss": 0.0893, "step": 2420 }, { "epoch": 1.76, "grad_norm": 4.2530670166015625, "learning_rate": 4.411764705882353e-05, "loss": 0.0736, "step": 2430 }, { "epoch": 1.77, "grad_norm": 10.697159767150879, "learning_rate": 4.429920116194626e-05, "loss": 0.0914, "step": 2440 }, { "epoch": 1.78, "grad_norm": 7.323067665100098, "learning_rate": 4.448075526506899e-05, "loss": 0.0936, "step": 2450 }, { "epoch": 1.79, "grad_norm": 6.918476581573486, "learning_rate": 4.466230936819172e-05, "loss": 0.1106, "step": 2460 }, { "epoch": 1.79, "grad_norm": 11.204655647277832, "learning_rate": 4.484386347131445e-05, "loss": 0.062, "step": 2470 }, { "epoch": 1.8, "grad_norm": 12.258004188537598, "learning_rate": 4.502541757443718e-05, "loss": 0.0533, "step": 2480 }, { "epoch": 1.81, "grad_norm": 5.304686069488525, "learning_rate": 4.520697167755992e-05, "loss": 0.0817, "step": 2490 }, { "epoch": 1.81, "grad_norm": 12.656023025512695, "learning_rate": 4.538852578068264e-05, "loss": 0.0825, "step": 2500 }, { "epoch": 1.82, "grad_norm": 2.970365047454834, "learning_rate": 4.557007988380538e-05, "loss": 0.0568, "step": 2510 }, { "epoch": 1.83, "grad_norm": 20.127803802490234, "learning_rate": 4.5751633986928104e-05, "loss": 0.0471, "step": 2520 }, { "epoch": 1.84, "grad_norm": 12.173340797424316, "learning_rate": 4.593318809005084e-05, "loss": 0.092, "step": 2530 }, { "epoch": 1.84, "grad_norm": 13.151833534240723, "learning_rate": 4.6114742193173564e-05, "loss": 0.0767, "step": 2540 }, { "epoch": 1.85, "grad_norm": 10.405213356018066, "learning_rate": 4.62962962962963e-05, "loss": 0.0629, "step": 2550 }, { "epoch": 1.86, "grad_norm": 3.035775899887085, "learning_rate": 4.647785039941903e-05, "loss": 0.0696, "step": 2560 }, { "epoch": 1.87, "grad_norm": 11.858610153198242, "learning_rate": 4.665940450254176e-05, "loss": 0.1054, "step": 2570 }, { "epoch": 1.87, "grad_norm": 16.282440185546875, "learning_rate": 4.684095860566449e-05, "loss": 0.0718, "step": 2580 }, { "epoch": 1.88, "grad_norm": 11.148844718933105, "learning_rate": 4.702251270878722e-05, "loss": 0.0664, "step": 2590 }, { "epoch": 1.89, "grad_norm": 6.528799057006836, "learning_rate": 4.720406681190995e-05, "loss": 0.0611, "step": 2600 }, { "epoch": 1.89, "grad_norm": 3.7072341442108154, "learning_rate": 4.738562091503268e-05, "loss": 0.043, "step": 2610 }, { "epoch": 1.9, "grad_norm": 16.07568359375, "learning_rate": 4.756717501815541e-05, "loss": 0.0739, "step": 2620 }, { "epoch": 1.91, "grad_norm": 13.207526206970215, "learning_rate": 4.774872912127814e-05, "loss": 0.0474, "step": 2630 }, { "epoch": 1.92, "grad_norm": 7.959559917449951, "learning_rate": 4.793028322440087e-05, "loss": 0.0883, "step": 2640 }, { "epoch": 1.92, "grad_norm": 9.581097602844238, "learning_rate": 4.811183732752361e-05, "loss": 0.0555, "step": 2650 }, { "epoch": 1.93, "grad_norm": 7.399395942687988, "learning_rate": 4.829339143064633e-05, "loss": 0.0877, "step": 2660 }, { "epoch": 1.94, "grad_norm": 1.6540353298187256, "learning_rate": 4.847494553376907e-05, "loss": 0.0745, "step": 2670 }, { "epoch": 1.95, "grad_norm": 8.871071815490723, "learning_rate": 4.865649963689179e-05, "loss": 0.0831, "step": 2680 }, { "epoch": 1.95, "grad_norm": 6.940814971923828, "learning_rate": 4.883805374001453e-05, "loss": 0.0736, "step": 2690 }, { "epoch": 1.96, "grad_norm": 5.259904861450195, "learning_rate": 4.901960784313725e-05, "loss": 0.1023, "step": 2700 }, { "epoch": 1.97, "grad_norm": 32.73249435424805, "learning_rate": 4.920116194625999e-05, "loss": 0.0767, "step": 2710 }, { "epoch": 1.97, "grad_norm": 13.280170440673828, "learning_rate": 4.938271604938271e-05, "loss": 0.1112, "step": 2720 }, { "epoch": 1.98, "grad_norm": 14.727370262145996, "learning_rate": 4.956427015250545e-05, "loss": 0.0511, "step": 2730 }, { "epoch": 1.99, "grad_norm": 30.67513084411621, "learning_rate": 4.974582425562818e-05, "loss": 0.0814, "step": 2740 }, { "epoch": 2.0, "grad_norm": 9.064040184020996, "learning_rate": 4.992737835875091e-05, "loss": 0.0917, "step": 2750 }, { "epoch": 2.0, "eval_accuracy": 0.9896310750102101, "eval_f1": 0.9760394274629057, "eval_loss": 0.03418319672346115, "eval_precision": 0.9803054239073197, "eval_recall": 0.9718103988306536, "eval_roc_auc": 0.9992664428625956, "eval_runtime": 387.256, "eval_samples_per_second": 227.622, "eval_steps_per_second": 14.228, "step": 2755 }, { "epoch": 2.0, "grad_norm": 4.060576915740967, "learning_rate": 4.9987896393125154e-05, "loss": 0.0802, "step": 2760 }, { "epoch": 2.01, "grad_norm": 5.140890121459961, "learning_rate": 4.996772371500041e-05, "loss": 0.0407, "step": 2770 }, { "epoch": 2.02, "grad_norm": 19.039766311645508, "learning_rate": 4.9947551036875656e-05, "loss": 0.0489, "step": 2780 }, { "epoch": 2.03, "grad_norm": 12.916511535644531, "learning_rate": 4.992737835875091e-05, "loss": 0.0472, "step": 2790 }, { "epoch": 2.03, "grad_norm": 1.2582281827926636, "learning_rate": 4.9907205680626165e-05, "loss": 0.0651, "step": 2800 }, { "epoch": 2.04, "grad_norm": 4.840940475463867, "learning_rate": 4.988703300250141e-05, "loss": 0.0578, "step": 2810 }, { "epoch": 2.05, "grad_norm": 5.674259185791016, "learning_rate": 4.986686032437667e-05, "loss": 0.0371, "step": 2820 }, { "epoch": 2.05, "grad_norm": 5.847988128662109, "learning_rate": 4.9846687646251915e-05, "loss": 0.0404, "step": 2830 }, { "epoch": 2.06, "grad_norm": 13.216257095336914, "learning_rate": 4.982651496812717e-05, "loss": 0.0904, "step": 2840 }, { "epoch": 2.07, "grad_norm": 9.643535614013672, "learning_rate": 4.9806342290002424e-05, "loss": 0.0462, "step": 2850 }, { "epoch": 2.08, "grad_norm": 10.265970230102539, "learning_rate": 4.978616961187768e-05, "loss": 0.0436, "step": 2860 }, { "epoch": 2.08, "grad_norm": 16.27211570739746, "learning_rate": 4.9765996933752926e-05, "loss": 0.0819, "step": 2870 }, { "epoch": 2.09, "grad_norm": 10.045124053955078, "learning_rate": 4.974582425562818e-05, "loss": 0.138, "step": 2880 }, { "epoch": 2.1, "grad_norm": 6.49107551574707, "learning_rate": 4.972565157750343e-05, "loss": 0.0705, "step": 2890 }, { "epoch": 2.11, "grad_norm": 7.387770175933838, "learning_rate": 4.970547889937868e-05, "loss": 0.0604, "step": 2900 }, { "epoch": 2.11, "grad_norm": 7.549914360046387, "learning_rate": 4.968530622125394e-05, "loss": 0.0517, "step": 2910 }, { "epoch": 2.12, "grad_norm": 16.87873077392578, "learning_rate": 4.966513354312919e-05, "loss": 0.0388, "step": 2920 }, { "epoch": 2.13, "grad_norm": 16.325939178466797, "learning_rate": 4.964496086500444e-05, "loss": 0.1262, "step": 2930 }, { "epoch": 2.13, "grad_norm": 5.032786846160889, "learning_rate": 4.9624788186879693e-05, "loss": 0.0517, "step": 2940 }, { "epoch": 2.14, "grad_norm": 2.5657832622528076, "learning_rate": 4.960461550875494e-05, "loss": 0.0199, "step": 2950 }, { "epoch": 2.15, "grad_norm": 37.460018157958984, "learning_rate": 4.9584442830630196e-05, "loss": 0.0367, "step": 2960 }, { "epoch": 2.16, "grad_norm": 33.25373458862305, "learning_rate": 4.956427015250545e-05, "loss": 0.0636, "step": 2970 }, { "epoch": 2.16, "grad_norm": 18.634830474853516, "learning_rate": 4.9544097474380705e-05, "loss": 0.0746, "step": 2980 }, { "epoch": 2.17, "grad_norm": 13.56174087524414, "learning_rate": 4.952392479625595e-05, "loss": 0.1066, "step": 2990 }, { "epoch": 2.18, "grad_norm": 9.151412963867188, "learning_rate": 4.950375211813121e-05, "loss": 0.0841, "step": 3000 }, { "epoch": 2.19, "grad_norm": 0.9245597124099731, "learning_rate": 4.9483579440006454e-05, "loss": 0.0327, "step": 3010 }, { "epoch": 2.19, "grad_norm": 25.459856033325195, "learning_rate": 4.946340676188171e-05, "loss": 0.0773, "step": 3020 }, { "epoch": 2.2, "grad_norm": 9.643467903137207, "learning_rate": 4.944323408375696e-05, "loss": 0.0392, "step": 3030 }, { "epoch": 2.21, "grad_norm": 4.589292049407959, "learning_rate": 4.942306140563222e-05, "loss": 0.0501, "step": 3040 }, { "epoch": 2.21, "grad_norm": 8.616634368896484, "learning_rate": 4.9402888727507465e-05, "loss": 0.0514, "step": 3050 }, { "epoch": 2.22, "grad_norm": 2.33439040184021, "learning_rate": 4.938271604938271e-05, "loss": 0.0411, "step": 3060 }, { "epoch": 2.23, "grad_norm": 8.900259971618652, "learning_rate": 4.936254337125797e-05, "loss": 0.0449, "step": 3070 }, { "epoch": 2.24, "grad_norm": 15.918954849243164, "learning_rate": 4.934237069313322e-05, "loss": 0.0539, "step": 3080 }, { "epoch": 2.24, "grad_norm": 23.657480239868164, "learning_rate": 4.9322198015008477e-05, "loss": 0.0574, "step": 3090 }, { "epoch": 2.25, "grad_norm": 13.110893249511719, "learning_rate": 4.930202533688373e-05, "loss": 0.0938, "step": 3100 }, { "epoch": 2.26, "grad_norm": 1.8193144798278809, "learning_rate": 4.928185265875898e-05, "loss": 0.0607, "step": 3110 }, { "epoch": 2.26, "grad_norm": 5.928867340087891, "learning_rate": 4.9261679980634226e-05, "loss": 0.0611, "step": 3120 }, { "epoch": 2.27, "grad_norm": 10.63883113861084, "learning_rate": 4.924150730250948e-05, "loss": 0.0371, "step": 3130 }, { "epoch": 2.28, "grad_norm": 7.217007637023926, "learning_rate": 4.9221334624384735e-05, "loss": 0.0453, "step": 3140 }, { "epoch": 2.29, "grad_norm": 22.366975784301758, "learning_rate": 4.920116194625999e-05, "loss": 0.1012, "step": 3150 }, { "epoch": 2.29, "grad_norm": 14.940516471862793, "learning_rate": 4.9180989268135244e-05, "loss": 0.0799, "step": 3160 }, { "epoch": 2.3, "grad_norm": 3.766080617904663, "learning_rate": 4.916081659001049e-05, "loss": 0.0644, "step": 3170 }, { "epoch": 2.31, "grad_norm": 17.51938819885254, "learning_rate": 4.914064391188574e-05, "loss": 0.0565, "step": 3180 }, { "epoch": 2.32, "grad_norm": 3.683283567428589, "learning_rate": 4.9120471233760994e-05, "loss": 0.0537, "step": 3190 }, { "epoch": 2.32, "grad_norm": 9.450156211853027, "learning_rate": 4.910029855563625e-05, "loss": 0.0537, "step": 3200 }, { "epoch": 2.33, "grad_norm": 7.481932640075684, "learning_rate": 4.90801258775115e-05, "loss": 0.0266, "step": 3210 }, { "epoch": 2.34, "grad_norm": 4.7421488761901855, "learning_rate": 4.905995319938676e-05, "loss": 0.0399, "step": 3220 }, { "epoch": 2.34, "grad_norm": 42.04796600341797, "learning_rate": 4.9039780521262005e-05, "loss": 0.0667, "step": 3230 }, { "epoch": 2.35, "grad_norm": 2.9122283458709717, "learning_rate": 4.901960784313725e-05, "loss": 0.0422, "step": 3240 }, { "epoch": 2.36, "grad_norm": 8.037728309631348, "learning_rate": 4.899943516501251e-05, "loss": 0.0922, "step": 3250 }, { "epoch": 2.37, "grad_norm": 1.3274216651916504, "learning_rate": 4.897926248688776e-05, "loss": 0.0563, "step": 3260 }, { "epoch": 2.37, "grad_norm": 3.631781816482544, "learning_rate": 4.8959089808763016e-05, "loss": 0.054, "step": 3270 }, { "epoch": 2.38, "grad_norm": 4.287731170654297, "learning_rate": 4.893891713063827e-05, "loss": 0.0523, "step": 3280 }, { "epoch": 2.39, "grad_norm": 0.8176174163818359, "learning_rate": 4.891874445251352e-05, "loss": 0.052, "step": 3290 }, { "epoch": 2.4, "grad_norm": 12.366915702819824, "learning_rate": 4.8898571774388766e-05, "loss": 0.0595, "step": 3300 }, { "epoch": 2.4, "grad_norm": 15.8939790725708, "learning_rate": 4.887839909626402e-05, "loss": 0.0665, "step": 3310 }, { "epoch": 2.41, "grad_norm": 17.355178833007812, "learning_rate": 4.8858226418139275e-05, "loss": 0.058, "step": 3320 }, { "epoch": 2.42, "grad_norm": 2.637277603149414, "learning_rate": 4.883805374001453e-05, "loss": 0.033, "step": 3330 }, { "epoch": 2.42, "grad_norm": 14.758194923400879, "learning_rate": 4.8817881061889784e-05, "loss": 0.0607, "step": 3340 }, { "epoch": 2.43, "grad_norm": 13.589420318603516, "learning_rate": 4.879770838376503e-05, "loss": 0.0328, "step": 3350 }, { "epoch": 2.44, "grad_norm": 15.758061408996582, "learning_rate": 4.877753570564028e-05, "loss": 0.0458, "step": 3360 }, { "epoch": 2.45, "grad_norm": 16.480510711669922, "learning_rate": 4.8757363027515534e-05, "loss": 0.0584, "step": 3370 }, { "epoch": 2.45, "grad_norm": 2.5956544876098633, "learning_rate": 4.873719034939079e-05, "loss": 0.0544, "step": 3380 }, { "epoch": 2.46, "grad_norm": 13.941973686218262, "learning_rate": 4.871701767126604e-05, "loss": 0.0625, "step": 3390 }, { "epoch": 2.47, "grad_norm": 9.241766929626465, "learning_rate": 4.86968449931413e-05, "loss": 0.0501, "step": 3400 }, { "epoch": 2.48, "grad_norm": 1.4883322715759277, "learning_rate": 4.8676672315016545e-05, "loss": 0.0711, "step": 3410 }, { "epoch": 2.48, "grad_norm": 2.9974913597106934, "learning_rate": 4.865649963689179e-05, "loss": 0.0285, "step": 3420 }, { "epoch": 2.49, "grad_norm": 5.6022629737854, "learning_rate": 4.863632695876705e-05, "loss": 0.0886, "step": 3430 }, { "epoch": 2.5, "grad_norm": 2.3902041912078857, "learning_rate": 4.86161542806423e-05, "loss": 0.051, "step": 3440 }, { "epoch": 2.5, "grad_norm": 2.2185537815093994, "learning_rate": 4.8595981602517556e-05, "loss": 0.035, "step": 3450 }, { "epoch": 2.51, "grad_norm": 24.16321563720703, "learning_rate": 4.857580892439281e-05, "loss": 0.0456, "step": 3460 }, { "epoch": 2.52, "grad_norm": 10.79037857055664, "learning_rate": 4.855563624626805e-05, "loss": 0.053, "step": 3470 }, { "epoch": 2.53, "grad_norm": 6.468301296234131, "learning_rate": 4.8535463568143306e-05, "loss": 0.0371, "step": 3480 }, { "epoch": 2.53, "grad_norm": 4.342319488525391, "learning_rate": 4.851529089001856e-05, "loss": 0.0349, "step": 3490 }, { "epoch": 2.54, "grad_norm": 1.0654453039169312, "learning_rate": 4.8495118211893815e-05, "loss": 0.0522, "step": 3500 }, { "epoch": 2.55, "grad_norm": 0.6455519199371338, "learning_rate": 4.847494553376907e-05, "loss": 0.0501, "step": 3510 }, { "epoch": 2.56, "grad_norm": 6.535991668701172, "learning_rate": 4.8454772855644324e-05, "loss": 0.0381, "step": 3520 }, { "epoch": 2.56, "grad_norm": 15.82268238067627, "learning_rate": 4.8434600177519565e-05, "loss": 0.0552, "step": 3530 }, { "epoch": 2.57, "grad_norm": 6.626350402832031, "learning_rate": 4.841442749939482e-05, "loss": 0.0605, "step": 3540 }, { "epoch": 2.58, "grad_norm": 9.876846313476562, "learning_rate": 4.8394254821270074e-05, "loss": 0.064, "step": 3550 }, { "epoch": 2.58, "grad_norm": 9.681201934814453, "learning_rate": 4.837408214314533e-05, "loss": 0.0469, "step": 3560 }, { "epoch": 2.59, "grad_norm": 12.74880313873291, "learning_rate": 4.835390946502058e-05, "loss": 0.0482, "step": 3570 }, { "epoch": 2.6, "grad_norm": 18.379024505615234, "learning_rate": 4.833373678689583e-05, "loss": 0.0353, "step": 3580 }, { "epoch": 2.61, "grad_norm": 17.69583511352539, "learning_rate": 4.831356410877108e-05, "loss": 0.0488, "step": 3590 }, { "epoch": 2.61, "grad_norm": 11.161755561828613, "learning_rate": 4.829339143064633e-05, "loss": 0.0497, "step": 3600 }, { "epoch": 2.62, "grad_norm": 1.346876621246338, "learning_rate": 4.827321875252159e-05, "loss": 0.0477, "step": 3610 }, { "epoch": 2.63, "grad_norm": 15.771431922912598, "learning_rate": 4.825304607439684e-05, "loss": 0.0409, "step": 3620 }, { "epoch": 2.64, "grad_norm": 14.307646751403809, "learning_rate": 4.8232873396272096e-05, "loss": 0.041, "step": 3630 }, { "epoch": 2.64, "grad_norm": 0.2704331874847412, "learning_rate": 4.8212700718147343e-05, "loss": 0.0552, "step": 3640 }, { "epoch": 2.65, "grad_norm": 8.454325675964355, "learning_rate": 4.819252804002259e-05, "loss": 0.0527, "step": 3650 }, { "epoch": 2.66, "grad_norm": 1.2138357162475586, "learning_rate": 4.8172355361897846e-05, "loss": 0.0429, "step": 3660 }, { "epoch": 2.66, "grad_norm": 11.901808738708496, "learning_rate": 4.81521826837731e-05, "loss": 0.0517, "step": 3670 }, { "epoch": 2.67, "grad_norm": 14.237427711486816, "learning_rate": 4.8132010005648354e-05, "loss": 0.0435, "step": 3680 }, { "epoch": 2.68, "grad_norm": 0.18843147158622742, "learning_rate": 4.811183732752361e-05, "loss": 0.0335, "step": 3690 }, { "epoch": 2.69, "grad_norm": 0.303036630153656, "learning_rate": 4.809166464939886e-05, "loss": 0.0432, "step": 3700 }, { "epoch": 2.69, "grad_norm": 25.766660690307617, "learning_rate": 4.8071491971274104e-05, "loss": 0.0481, "step": 3710 }, { "epoch": 2.7, "grad_norm": 16.13401222229004, "learning_rate": 4.805131929314936e-05, "loss": 0.0807, "step": 3720 }, { "epoch": 2.71, "grad_norm": 16.700721740722656, "learning_rate": 4.803114661502461e-05, "loss": 0.0575, "step": 3730 }, { "epoch": 2.72, "grad_norm": 1.0864169597625732, "learning_rate": 4.801097393689987e-05, "loss": 0.0522, "step": 3740 }, { "epoch": 2.72, "grad_norm": 6.126308441162109, "learning_rate": 4.799080125877512e-05, "loss": 0.0577, "step": 3750 }, { "epoch": 2.73, "grad_norm": 2.5702388286590576, "learning_rate": 4.797062858065037e-05, "loss": 0.0711, "step": 3760 }, { "epoch": 2.74, "grad_norm": 4.559757709503174, "learning_rate": 4.795045590252562e-05, "loss": 0.042, "step": 3770 }, { "epoch": 2.74, "grad_norm": 20.285289764404297, "learning_rate": 4.793028322440087e-05, "loss": 0.0647, "step": 3780 }, { "epoch": 2.75, "grad_norm": 14.125584602355957, "learning_rate": 4.7910110546276126e-05, "loss": 0.0396, "step": 3790 }, { "epoch": 2.76, "grad_norm": 6.698452949523926, "learning_rate": 4.788993786815138e-05, "loss": 0.0107, "step": 3800 }, { "epoch": 2.77, "grad_norm": 12.645397186279297, "learning_rate": 4.786976519002663e-05, "loss": 0.0403, "step": 3810 }, { "epoch": 2.77, "grad_norm": 8.991414070129395, "learning_rate": 4.784959251190188e-05, "loss": 0.049, "step": 3820 }, { "epoch": 2.78, "grad_norm": 2.049801826477051, "learning_rate": 4.782941983377713e-05, "loss": 0.0919, "step": 3830 }, { "epoch": 2.79, "grad_norm": 3.793942928314209, "learning_rate": 4.7809247155652385e-05, "loss": 0.0557, "step": 3840 }, { "epoch": 2.79, "grad_norm": 19.353565216064453, "learning_rate": 4.778907447752764e-05, "loss": 0.0442, "step": 3850 }, { "epoch": 2.8, "grad_norm": 8.632854461669922, "learning_rate": 4.7768901799402894e-05, "loss": 0.0458, "step": 3860 }, { "epoch": 2.81, "grad_norm": 25.426410675048828, "learning_rate": 4.774872912127814e-05, "loss": 0.0329, "step": 3870 }, { "epoch": 2.82, "grad_norm": 5.492660999298096, "learning_rate": 4.7728556443153396e-05, "loss": 0.0371, "step": 3880 }, { "epoch": 2.82, "grad_norm": 17.053266525268555, "learning_rate": 4.7708383765028644e-05, "loss": 0.0354, "step": 3890 }, { "epoch": 2.83, "grad_norm": 23.848909378051758, "learning_rate": 4.76882110869039e-05, "loss": 0.0639, "step": 3900 }, { "epoch": 2.84, "grad_norm": 2.4269025325775146, "learning_rate": 4.766803840877915e-05, "loss": 0.0476, "step": 3910 }, { "epoch": 2.85, "grad_norm": 8.225174903869629, "learning_rate": 4.764786573065441e-05, "loss": 0.0682, "step": 3920 }, { "epoch": 2.85, "grad_norm": 5.187263488769531, "learning_rate": 4.7627693052529655e-05, "loss": 0.0318, "step": 3930 }, { "epoch": 2.86, "grad_norm": 3.262751817703247, "learning_rate": 4.760752037440491e-05, "loss": 0.0415, "step": 3940 }, { "epoch": 2.87, "grad_norm": 6.393750190734863, "learning_rate": 4.758734769628016e-05, "loss": 0.0454, "step": 3950 }, { "epoch": 2.87, "grad_norm": 15.57343578338623, "learning_rate": 4.756717501815541e-05, "loss": 0.055, "step": 3960 }, { "epoch": 2.88, "grad_norm": 6.782766819000244, "learning_rate": 4.7547002340030666e-05, "loss": 0.0253, "step": 3970 }, { "epoch": 2.89, "grad_norm": 9.348075866699219, "learning_rate": 4.752682966190592e-05, "loss": 0.0761, "step": 3980 }, { "epoch": 2.9, "grad_norm": 6.6344146728515625, "learning_rate": 4.750665698378117e-05, "loss": 0.0232, "step": 3990 }, { "epoch": 2.9, "grad_norm": 9.608769416809082, "learning_rate": 4.748648430565642e-05, "loss": 0.0657, "step": 4000 }, { "epoch": 2.91, "grad_norm": 8.99563980102539, "learning_rate": 4.746631162753167e-05, "loss": 0.0345, "step": 4010 }, { "epoch": 2.92, "grad_norm": 33.83638381958008, "learning_rate": 4.7446138949406925e-05, "loss": 0.1342, "step": 4020 }, { "epoch": 2.93, "grad_norm": 0.5134745836257935, "learning_rate": 4.742596627128218e-05, "loss": 0.0923, "step": 4030 }, { "epoch": 2.93, "grad_norm": 5.171667575836182, "learning_rate": 4.7405793593157434e-05, "loss": 0.0384, "step": 4040 }, { "epoch": 2.94, "grad_norm": 7.891140937805176, "learning_rate": 4.738562091503268e-05, "loss": 0.0369, "step": 4050 }, { "epoch": 2.95, "grad_norm": 11.524323463439941, "learning_rate": 4.7365448236907936e-05, "loss": 0.041, "step": 4060 }, { "epoch": 2.95, "grad_norm": 3.1125144958496094, "learning_rate": 4.7345275558783184e-05, "loss": 0.0292, "step": 4070 }, { "epoch": 2.96, "grad_norm": 1.6844711303710938, "learning_rate": 4.732510288065844e-05, "loss": 0.0572, "step": 4080 }, { "epoch": 2.97, "grad_norm": 10.969454765319824, "learning_rate": 4.730493020253369e-05, "loss": 0.0367, "step": 4090 }, { "epoch": 2.98, "grad_norm": 1.6989827156066895, "learning_rate": 4.728475752440894e-05, "loss": 0.0284, "step": 4100 }, { "epoch": 2.98, "grad_norm": 14.795607566833496, "learning_rate": 4.7264584846284195e-05, "loss": 0.0299, "step": 4110 }, { "epoch": 2.99, "grad_norm": 0.03458476439118385, "learning_rate": 4.724441216815945e-05, "loss": 0.0507, "step": 4120 }, { "epoch": 3.0, "grad_norm": 3.454913377761841, "learning_rate": 4.72242394900347e-05, "loss": 0.0291, "step": 4130 }, { "epoch": 3.0, "eval_accuracy": 0.9940100739665109, "eval_f1": 0.9862807254586083, "eval_loss": 0.016116181388497353, "eval_precision": 0.9818416968442835, "eval_recall": 0.9907600751722698, "eval_roc_auc": 0.9997706287577176, "eval_runtime": 389.701, "eval_samples_per_second": 226.194, "eval_steps_per_second": 14.139, "step": 4132 }, { "epoch": 3.01, "grad_norm": 7.517953872680664, "learning_rate": 4.720406681190995e-05, "loss": 0.0522, "step": 4140 }, { "epoch": 3.01, "grad_norm": 1.9929357767105103, "learning_rate": 4.7183894133785206e-05, "loss": 0.0338, "step": 4150 }, { "epoch": 3.02, "grad_norm": 10.290563583374023, "learning_rate": 4.7163721455660454e-05, "loss": 0.055, "step": 4160 }, { "epoch": 3.03, "grad_norm": 6.110312461853027, "learning_rate": 4.714354877753571e-05, "loss": 0.0607, "step": 4170 }, { "epoch": 3.03, "grad_norm": 1.5012456178665161, "learning_rate": 4.712337609941096e-05, "loss": 0.0353, "step": 4180 }, { "epoch": 3.04, "grad_norm": 0.7207418084144592, "learning_rate": 4.710320342128621e-05, "loss": 0.0183, "step": 4190 }, { "epoch": 3.05, "grad_norm": 15.605572700500488, "learning_rate": 4.7083030743161465e-05, "loss": 0.0298, "step": 4200 }, { "epoch": 3.06, "grad_norm": 0.7380584478378296, "learning_rate": 4.706285806503672e-05, "loss": 0.0274, "step": 4210 }, { "epoch": 3.06, "grad_norm": 0.27790164947509766, "learning_rate": 4.704268538691197e-05, "loss": 0.0217, "step": 4220 }, { "epoch": 3.07, "grad_norm": 15.396061897277832, "learning_rate": 4.702251270878722e-05, "loss": 0.0394, "step": 4230 }, { "epoch": 3.08, "grad_norm": 7.82650899887085, "learning_rate": 4.7002340030662476e-05, "loss": 0.0251, "step": 4240 }, { "epoch": 3.09, "grad_norm": 15.73935317993164, "learning_rate": 4.6982167352537723e-05, "loss": 0.0552, "step": 4250 }, { "epoch": 3.09, "grad_norm": 5.134984016418457, "learning_rate": 4.696199467441298e-05, "loss": 0.0308, "step": 4260 }, { "epoch": 3.1, "grad_norm": 5.871382713317871, "learning_rate": 4.694182199628823e-05, "loss": 0.0491, "step": 4270 }, { "epoch": 3.11, "grad_norm": 8.34383773803711, "learning_rate": 4.692164931816348e-05, "loss": 0.0193, "step": 4280 }, { "epoch": 3.11, "grad_norm": 21.052440643310547, "learning_rate": 4.6901476640038735e-05, "loss": 0.0225, "step": 4290 }, { "epoch": 3.12, "grad_norm": 27.46809959411621, "learning_rate": 4.688130396191399e-05, "loss": 0.0557, "step": 4300 }, { "epoch": 3.13, "grad_norm": 0.6610531806945801, "learning_rate": 4.686113128378924e-05, "loss": 0.0365, "step": 4310 }, { "epoch": 3.14, "grad_norm": 0.41435083746910095, "learning_rate": 4.684095860566449e-05, "loss": 0.0202, "step": 4320 }, { "epoch": 3.14, "grad_norm": 10.013864517211914, "learning_rate": 4.682078592753974e-05, "loss": 0.049, "step": 4330 }, { "epoch": 3.15, "grad_norm": 22.101505279541016, "learning_rate": 4.680061324941499e-05, "loss": 0.0436, "step": 4340 }, { "epoch": 3.16, "grad_norm": 6.700098037719727, "learning_rate": 4.678044057129025e-05, "loss": 0.1032, "step": 4350 }, { "epoch": 3.17, "grad_norm": 8.122451782226562, "learning_rate": 4.67602678931655e-05, "loss": 0.049, "step": 4360 }, { "epoch": 3.17, "grad_norm": 1.0085687637329102, "learning_rate": 4.674009521504075e-05, "loss": 0.0309, "step": 4370 }, { "epoch": 3.18, "grad_norm": 0.43637529015541077, "learning_rate": 4.6719922536916004e-05, "loss": 0.0359, "step": 4380 }, { "epoch": 3.19, "grad_norm": 6.261972904205322, "learning_rate": 4.669974985879125e-05, "loss": 0.0514, "step": 4390 }, { "epoch": 3.19, "grad_norm": 0.32403895258903503, "learning_rate": 4.6679577180666507e-05, "loss": 0.0513, "step": 4400 }, { "epoch": 3.2, "grad_norm": 3.918548583984375, "learning_rate": 4.665940450254176e-05, "loss": 0.0566, "step": 4410 }, { "epoch": 3.21, "grad_norm": 17.437055587768555, "learning_rate": 4.6639231824417016e-05, "loss": 0.052, "step": 4420 }, { "epoch": 3.22, "grad_norm": 0.20225393772125244, "learning_rate": 4.661905914629226e-05, "loss": 0.0375, "step": 4430 }, { "epoch": 3.22, "grad_norm": 15.23760986328125, "learning_rate": 4.659888646816752e-05, "loss": 0.0346, "step": 4440 }, { "epoch": 3.23, "grad_norm": 0.9687026143074036, "learning_rate": 4.6578713790042765e-05, "loss": 0.0222, "step": 4450 }, { "epoch": 3.24, "grad_norm": 1.4425278902053833, "learning_rate": 4.655854111191802e-05, "loss": 0.0376, "step": 4460 }, { "epoch": 3.25, "grad_norm": 7.644813537597656, "learning_rate": 4.6538368433793274e-05, "loss": 0.034, "step": 4470 }, { "epoch": 3.25, "grad_norm": 9.407659530639648, "learning_rate": 4.651819575566853e-05, "loss": 0.0344, "step": 4480 }, { "epoch": 3.26, "grad_norm": 9.224335670471191, "learning_rate": 4.6498023077543776e-05, "loss": 0.0337, "step": 4490 }, { "epoch": 3.27, "grad_norm": 19.596595764160156, "learning_rate": 4.647785039941903e-05, "loss": 0.0391, "step": 4500 }, { "epoch": 3.27, "grad_norm": 14.081114768981934, "learning_rate": 4.645767772129428e-05, "loss": 0.0267, "step": 4510 }, { "epoch": 3.28, "grad_norm": 0.47743141651153564, "learning_rate": 4.643750504316953e-05, "loss": 0.0472, "step": 4520 }, { "epoch": 3.29, "grad_norm": 0.03773049637675285, "learning_rate": 4.641733236504479e-05, "loss": 0.0164, "step": 4530 }, { "epoch": 3.3, "grad_norm": 0.055120404809713364, "learning_rate": 4.639715968692004e-05, "loss": 0.0395, "step": 4540 }, { "epoch": 3.3, "grad_norm": 14.224281311035156, "learning_rate": 4.637698700879529e-05, "loss": 0.0258, "step": 4550 }, { "epoch": 3.31, "grad_norm": 19.072011947631836, "learning_rate": 4.635681433067054e-05, "loss": 0.0379, "step": 4560 }, { "epoch": 3.32, "grad_norm": 0.13876385986804962, "learning_rate": 4.633664165254579e-05, "loss": 0.0334, "step": 4570 }, { "epoch": 3.32, "grad_norm": 2.5720953941345215, "learning_rate": 4.6316468974421046e-05, "loss": 0.0297, "step": 4580 }, { "epoch": 3.33, "grad_norm": 5.991764068603516, "learning_rate": 4.62962962962963e-05, "loss": 0.0555, "step": 4590 }, { "epoch": 3.34, "grad_norm": 5.875485897064209, "learning_rate": 4.627612361817155e-05, "loss": 0.0323, "step": 4600 }, { "epoch": 3.35, "grad_norm": 3.093625783920288, "learning_rate": 4.62559509400468e-05, "loss": 0.0449, "step": 4610 }, { "epoch": 3.35, "grad_norm": 0.6352009773254395, "learning_rate": 4.623577826192205e-05, "loss": 0.0327, "step": 4620 }, { "epoch": 3.36, "grad_norm": 0.2224309891462326, "learning_rate": 4.6215605583797305e-05, "loss": 0.0566, "step": 4630 }, { "epoch": 3.37, "grad_norm": 3.2887914180755615, "learning_rate": 4.619543290567256e-05, "loss": 0.0494, "step": 4640 }, { "epoch": 3.38, "grad_norm": 5.372390270233154, "learning_rate": 4.6175260227547814e-05, "loss": 0.034, "step": 4650 }, { "epoch": 3.38, "grad_norm": 5.275757312774658, "learning_rate": 4.615508754942306e-05, "loss": 0.0348, "step": 4660 }, { "epoch": 3.39, "grad_norm": 3.769078016281128, "learning_rate": 4.6134914871298316e-05, "loss": 0.025, "step": 4670 }, { "epoch": 3.4, "grad_norm": 6.985911846160889, "learning_rate": 4.6114742193173564e-05, "loss": 0.0481, "step": 4680 }, { "epoch": 3.4, "grad_norm": 3.3573648929595947, "learning_rate": 4.609456951504882e-05, "loss": 0.0421, "step": 4690 }, { "epoch": 3.41, "grad_norm": 0.22904418408870697, "learning_rate": 4.607439683692407e-05, "loss": 0.0155, "step": 4700 }, { "epoch": 3.42, "grad_norm": 1.3723245859146118, "learning_rate": 4.605422415879933e-05, "loss": 0.0151, "step": 4710 }, { "epoch": 3.43, "grad_norm": 2.954483985900879, "learning_rate": 4.6034051480674575e-05, "loss": 0.0665, "step": 4720 }, { "epoch": 3.43, "grad_norm": 24.531171798706055, "learning_rate": 4.601387880254983e-05, "loss": 0.06, "step": 4730 }, { "epoch": 3.44, "grad_norm": 17.55864143371582, "learning_rate": 4.599370612442508e-05, "loss": 0.05, "step": 4740 }, { "epoch": 3.45, "grad_norm": 0.5369663238525391, "learning_rate": 4.597353344630033e-05, "loss": 0.0139, "step": 4750 }, { "epoch": 3.46, "grad_norm": 2.363798141479492, "learning_rate": 4.5953360768175586e-05, "loss": 0.013, "step": 4760 }, { "epoch": 3.46, "grad_norm": 6.0249714851379395, "learning_rate": 4.593318809005084e-05, "loss": 0.066, "step": 4770 }, { "epoch": 3.47, "grad_norm": 11.2283353805542, "learning_rate": 4.591301541192609e-05, "loss": 0.0614, "step": 4780 }, { "epoch": 3.48, "grad_norm": 12.915953636169434, "learning_rate": 4.5892842733801336e-05, "loss": 0.0251, "step": 4790 }, { "epoch": 3.48, "grad_norm": 2.8560121059417725, "learning_rate": 4.587267005567659e-05, "loss": 0.0239, "step": 4800 }, { "epoch": 3.49, "grad_norm": 14.936617851257324, "learning_rate": 4.5852497377551845e-05, "loss": 0.0343, "step": 4810 }, { "epoch": 3.5, "grad_norm": 0.29327040910720825, "learning_rate": 4.58323246994271e-05, "loss": 0.0474, "step": 4820 }, { "epoch": 3.51, "grad_norm": 2.6680634021759033, "learning_rate": 4.5812152021302354e-05, "loss": 0.0111, "step": 4830 }, { "epoch": 3.51, "grad_norm": 0.033980220556259155, "learning_rate": 4.57919793431776e-05, "loss": 0.0652, "step": 4840 }, { "epoch": 3.52, "grad_norm": 0.2327333241701126, "learning_rate": 4.577180666505285e-05, "loss": 0.0188, "step": 4850 }, { "epoch": 3.53, "grad_norm": 3.055844783782959, "learning_rate": 4.5751633986928104e-05, "loss": 0.0408, "step": 4860 }, { "epoch": 3.54, "grad_norm": 4.960373878479004, "learning_rate": 4.573146130880336e-05, "loss": 0.0363, "step": 4870 }, { "epoch": 3.54, "grad_norm": 1.730433702468872, "learning_rate": 4.571128863067861e-05, "loss": 0.0296, "step": 4880 }, { "epoch": 3.55, "grad_norm": 12.168339729309082, "learning_rate": 4.569111595255387e-05, "loss": 0.0315, "step": 4890 }, { "epoch": 3.56, "grad_norm": 11.328636169433594, "learning_rate": 4.5670943274429115e-05, "loss": 0.014, "step": 4900 }, { "epoch": 3.56, "grad_norm": 0.4388793110847473, "learning_rate": 4.565077059630436e-05, "loss": 0.0374, "step": 4910 }, { "epoch": 3.57, "grad_norm": 3.212505578994751, "learning_rate": 4.563059791817962e-05, "loss": 0.0709, "step": 4920 }, { "epoch": 3.58, "grad_norm": 1.8563919067382812, "learning_rate": 4.561042524005487e-05, "loss": 0.0561, "step": 4930 }, { "epoch": 3.59, "grad_norm": 5.985075950622559, "learning_rate": 4.5590252561930126e-05, "loss": 0.0382, "step": 4940 }, { "epoch": 3.59, "grad_norm": 2.54897403717041, "learning_rate": 4.557007988380538e-05, "loss": 0.0399, "step": 4950 }, { "epoch": 3.6, "grad_norm": 0.6124072074890137, "learning_rate": 4.554990720568063e-05, "loss": 0.0231, "step": 4960 }, { "epoch": 3.61, "grad_norm": 7.537976264953613, "learning_rate": 4.5529734527555876e-05, "loss": 0.0276, "step": 4970 }, { "epoch": 3.62, "grad_norm": 0.04458412900567055, "learning_rate": 4.550956184943113e-05, "loss": 0.0548, "step": 4980 }, { "epoch": 3.62, "grad_norm": 3.02335524559021, "learning_rate": 4.5489389171306385e-05, "loss": 0.039, "step": 4990 }, { "epoch": 3.63, "grad_norm": 10.046403884887695, "learning_rate": 4.546921649318164e-05, "loss": 0.0427, "step": 5000 }, { "epoch": 3.64, "grad_norm": 3.8308396339416504, "learning_rate": 4.5449043815056893e-05, "loss": 0.0399, "step": 5010 }, { "epoch": 3.64, "grad_norm": 0.474543958902359, "learning_rate": 4.542887113693214e-05, "loss": 0.0349, "step": 5020 }, { "epoch": 3.65, "grad_norm": 0.38830775022506714, "learning_rate": 4.540869845880739e-05, "loss": 0.0271, "step": 5030 }, { "epoch": 3.66, "grad_norm": 6.250176906585693, "learning_rate": 4.538852578068264e-05, "loss": 0.0786, "step": 5040 }, { "epoch": 3.67, "grad_norm": 11.84267807006836, "learning_rate": 4.53683531025579e-05, "loss": 0.0187, "step": 5050 }, { "epoch": 3.67, "grad_norm": 2.9608876705169678, "learning_rate": 4.534818042443315e-05, "loss": 0.0289, "step": 5060 }, { "epoch": 3.68, "grad_norm": 20.82479476928711, "learning_rate": 4.532800774630841e-05, "loss": 0.0372, "step": 5070 }, { "epoch": 3.69, "grad_norm": 12.964320182800293, "learning_rate": 4.5307835068183654e-05, "loss": 0.0439, "step": 5080 }, { "epoch": 3.7, "grad_norm": 18.148500442504883, "learning_rate": 4.52876623900589e-05, "loss": 0.042, "step": 5090 }, { "epoch": 3.7, "grad_norm": 0.2496163696050644, "learning_rate": 4.5267489711934157e-05, "loss": 0.0292, "step": 5100 }, { "epoch": 3.71, "grad_norm": 1.652496576309204, "learning_rate": 4.524731703380941e-05, "loss": 0.0364, "step": 5110 }, { "epoch": 3.72, "grad_norm": 6.644486427307129, "learning_rate": 4.5227144355684665e-05, "loss": 0.0148, "step": 5120 }, { "epoch": 3.72, "grad_norm": 0.4298780858516693, "learning_rate": 4.520697167755992e-05, "loss": 0.0164, "step": 5130 }, { "epoch": 3.73, "grad_norm": 9.257946968078613, "learning_rate": 4.518679899943517e-05, "loss": 0.0382, "step": 5140 }, { "epoch": 3.74, "grad_norm": 13.765926361083984, "learning_rate": 4.5166626321310415e-05, "loss": 0.0378, "step": 5150 }, { "epoch": 3.75, "grad_norm": 5.054808139801025, "learning_rate": 4.514645364318567e-05, "loss": 0.0532, "step": 5160 }, { "epoch": 3.75, "grad_norm": 22.202678680419922, "learning_rate": 4.5126280965060924e-05, "loss": 0.0287, "step": 5170 }, { "epoch": 3.76, "grad_norm": 3.7927277088165283, "learning_rate": 4.510610828693618e-05, "loss": 0.0358, "step": 5180 }, { "epoch": 3.77, "grad_norm": 10.952911376953125, "learning_rate": 4.508593560881143e-05, "loss": 0.0544, "step": 5190 }, { "epoch": 3.77, "grad_norm": 0.1568138152360916, "learning_rate": 4.506576293068668e-05, "loss": 0.0357, "step": 5200 }, { "epoch": 3.78, "grad_norm": 3.809103012084961, "learning_rate": 4.504559025256193e-05, "loss": 0.0513, "step": 5210 }, { "epoch": 3.79, "grad_norm": 0.44535213708877563, "learning_rate": 4.502541757443718e-05, "loss": 0.028, "step": 5220 }, { "epoch": 3.8, "grad_norm": 3.141669511795044, "learning_rate": 4.500524489631244e-05, "loss": 0.0191, "step": 5230 }, { "epoch": 3.8, "grad_norm": 12.636493682861328, "learning_rate": 4.498507221818769e-05, "loss": 0.0366, "step": 5240 }, { "epoch": 3.81, "grad_norm": 0.021700365468859673, "learning_rate": 4.4964899540062946e-05, "loss": 0.0167, "step": 5250 }, { "epoch": 3.82, "grad_norm": 25.216238021850586, "learning_rate": 4.4944726861938194e-05, "loss": 0.0331, "step": 5260 }, { "epoch": 3.83, "grad_norm": 14.333684921264648, "learning_rate": 4.492455418381344e-05, "loss": 0.0093, "step": 5270 }, { "epoch": 3.83, "grad_norm": 2.936802625656128, "learning_rate": 4.4904381505688696e-05, "loss": 0.0569, "step": 5280 }, { "epoch": 3.84, "grad_norm": 0.08579454571008682, "learning_rate": 4.488420882756395e-05, "loss": 0.0667, "step": 5290 }, { "epoch": 3.85, "grad_norm": 0.8753920793533325, "learning_rate": 4.4864036149439205e-05, "loss": 0.0313, "step": 5300 }, { "epoch": 3.85, "grad_norm": 7.517170429229736, "learning_rate": 4.484386347131445e-05, "loss": 0.0258, "step": 5310 }, { "epoch": 3.86, "grad_norm": 7.475151538848877, "learning_rate": 4.482369079318971e-05, "loss": 0.0257, "step": 5320 }, { "epoch": 3.87, "grad_norm": 13.565512657165527, "learning_rate": 4.4803518115064955e-05, "loss": 0.0419, "step": 5330 }, { "epoch": 3.88, "grad_norm": 1.0435971021652222, "learning_rate": 4.478334543694021e-05, "loss": 0.0623, "step": 5340 }, { "epoch": 3.88, "grad_norm": 9.602534294128418, "learning_rate": 4.4763172758815464e-05, "loss": 0.0553, "step": 5350 }, { "epoch": 3.89, "grad_norm": 0.1525646597146988, "learning_rate": 4.474300008069072e-05, "loss": 0.0309, "step": 5360 }, { "epoch": 3.9, "grad_norm": 12.293474197387695, "learning_rate": 4.4722827402565966e-05, "loss": 0.0325, "step": 5370 }, { "epoch": 3.91, "grad_norm": 15.566313743591309, "learning_rate": 4.470265472444122e-05, "loss": 0.0398, "step": 5380 }, { "epoch": 3.91, "grad_norm": 22.519784927368164, "learning_rate": 4.468248204631647e-05, "loss": 0.0469, "step": 5390 }, { "epoch": 3.92, "grad_norm": 5.122323036193848, "learning_rate": 4.466230936819172e-05, "loss": 0.0284, "step": 5400 }, { "epoch": 3.93, "grad_norm": 2.0292279720306396, "learning_rate": 4.464213669006698e-05, "loss": 0.0123, "step": 5410 }, { "epoch": 3.93, "grad_norm": 0.47845131158828735, "learning_rate": 4.462196401194223e-05, "loss": 0.0277, "step": 5420 }, { "epoch": 3.94, "grad_norm": 0.20897838473320007, "learning_rate": 4.460179133381748e-05, "loss": 0.0237, "step": 5430 }, { "epoch": 3.95, "grad_norm": 5.986893177032471, "learning_rate": 4.4581618655692734e-05, "loss": 0.0292, "step": 5440 }, { "epoch": 3.96, "grad_norm": 7.410892009735107, "learning_rate": 4.456144597756798e-05, "loss": 0.026, "step": 5450 }, { "epoch": 3.96, "grad_norm": 23.17868423461914, "learning_rate": 4.4541273299443236e-05, "loss": 0.0494, "step": 5460 }, { "epoch": 3.97, "grad_norm": 16.8724308013916, "learning_rate": 4.452110062131849e-05, "loss": 0.0276, "step": 5470 }, { "epoch": 3.98, "grad_norm": 6.479537010192871, "learning_rate": 4.4500927943193745e-05, "loss": 0.0244, "step": 5480 }, { "epoch": 3.99, "grad_norm": 2.1582884788513184, "learning_rate": 4.448075526506899e-05, "loss": 0.0245, "step": 5490 }, { "epoch": 3.99, "grad_norm": 1.5921260118484497, "learning_rate": 4.446058258694425e-05, "loss": 0.0111, "step": 5500 }, { "epoch": 4.0, "grad_norm": 7.180184841156006, "learning_rate": 4.4440409908819495e-05, "loss": 0.0454, "step": 5510 }, { "epoch": 4.0, "eval_accuracy": 0.994963016744566, "eval_f1": 0.9883720930232558, "eval_loss": 0.013561672531068325, "eval_precision": 0.991696447340761, "eval_recall": 0.9850699519732721, "eval_roc_auc": 0.9998451949640312, "eval_runtime": 388.167, "eval_samples_per_second": 227.088, "eval_steps_per_second": 14.195, "step": 5510 }, { "epoch": 4.01, "grad_norm": 10.229007720947266, "learning_rate": 4.442023723069475e-05, "loss": 0.0535, "step": 5520 }, { "epoch": 4.01, "grad_norm": 0.41513580083847046, "learning_rate": 4.4400064552570004e-05, "loss": 0.0197, "step": 5530 }, { "epoch": 4.02, "grad_norm": 1.0428308248519897, "learning_rate": 4.437989187444525e-05, "loss": 0.0396, "step": 5540 }, { "epoch": 4.03, "grad_norm": 21.104991912841797, "learning_rate": 4.4359719196320506e-05, "loss": 0.0461, "step": 5550 }, { "epoch": 4.04, "grad_norm": 0.17535223066806793, "learning_rate": 4.433954651819576e-05, "loss": 0.0242, "step": 5560 }, { "epoch": 4.04, "grad_norm": 1.7396169900894165, "learning_rate": 4.431937384007101e-05, "loss": 0.0127, "step": 5570 }, { "epoch": 4.05, "grad_norm": 0.34911617636680603, "learning_rate": 4.429920116194626e-05, "loss": 0.0213, "step": 5580 }, { "epoch": 4.06, "grad_norm": 26.257558822631836, "learning_rate": 4.427902848382152e-05, "loss": 0.045, "step": 5590 }, { "epoch": 4.07, "grad_norm": 0.12217586487531662, "learning_rate": 4.4258855805696765e-05, "loss": 0.0103, "step": 5600 }, { "epoch": 4.07, "grad_norm": 0.008990990929305553, "learning_rate": 4.423868312757202e-05, "loss": 0.0157, "step": 5610 }, { "epoch": 4.08, "grad_norm": 0.0027994182892143726, "learning_rate": 4.4218510449447274e-05, "loss": 0.0884, "step": 5620 }, { "epoch": 4.09, "grad_norm": 0.013051895424723625, "learning_rate": 4.419833777132252e-05, "loss": 0.0135, "step": 5630 }, { "epoch": 4.09, "grad_norm": 9.522844314575195, "learning_rate": 4.4178165093197776e-05, "loss": 0.0197, "step": 5640 }, { "epoch": 4.1, "grad_norm": 3.2195699214935303, "learning_rate": 4.415799241507303e-05, "loss": 0.02, "step": 5650 }, { "epoch": 4.11, "grad_norm": 13.479093551635742, "learning_rate": 4.413781973694828e-05, "loss": 0.041, "step": 5660 }, { "epoch": 4.12, "grad_norm": 16.785228729248047, "learning_rate": 4.411764705882353e-05, "loss": 0.0327, "step": 5670 }, { "epoch": 4.12, "grad_norm": 17.039676666259766, "learning_rate": 4.409747438069878e-05, "loss": 0.0227, "step": 5680 }, { "epoch": 4.13, "grad_norm": 14.65450668334961, "learning_rate": 4.4077301702574034e-05, "loss": 0.0299, "step": 5690 }, { "epoch": 4.14, "grad_norm": 12.701321601867676, "learning_rate": 4.405712902444929e-05, "loss": 0.0146, "step": 5700 }, { "epoch": 4.15, "grad_norm": 5.022671699523926, "learning_rate": 4.4036956346324543e-05, "loss": 0.01, "step": 5710 }, { "epoch": 4.15, "grad_norm": 0.2834198474884033, "learning_rate": 4.401678366819979e-05, "loss": 0.0465, "step": 5720 }, { "epoch": 4.16, "grad_norm": 3.1493167877197266, "learning_rate": 4.3996610990075046e-05, "loss": 0.0307, "step": 5730 }, { "epoch": 4.17, "grad_norm": 0.09327320009469986, "learning_rate": 4.397643831195029e-05, "loss": 0.0339, "step": 5740 }, { "epoch": 4.17, "grad_norm": 12.125381469726562, "learning_rate": 4.395626563382555e-05, "loss": 0.0245, "step": 5750 }, { "epoch": 4.18, "grad_norm": 12.335347175598145, "learning_rate": 4.39360929557008e-05, "loss": 0.0333, "step": 5760 }, { "epoch": 4.19, "grad_norm": 0.17939302325248718, "learning_rate": 4.391592027757606e-05, "loss": 0.0147, "step": 5770 }, { "epoch": 4.2, "grad_norm": 2.4980413913726807, "learning_rate": 4.3895747599451304e-05, "loss": 0.0392, "step": 5780 }, { "epoch": 4.2, "grad_norm": 3.9628798961639404, "learning_rate": 4.387557492132656e-05, "loss": 0.0202, "step": 5790 }, { "epoch": 4.21, "grad_norm": 14.232178688049316, "learning_rate": 4.3855402243201806e-05, "loss": 0.0274, "step": 5800 }, { "epoch": 4.22, "grad_norm": 3.835378646850586, "learning_rate": 4.383522956507706e-05, "loss": 0.0185, "step": 5810 }, { "epoch": 4.23, "grad_norm": 3.1973910331726074, "learning_rate": 4.3815056886952315e-05, "loss": 0.0209, "step": 5820 }, { "epoch": 4.23, "grad_norm": 10.52609634399414, "learning_rate": 4.379488420882756e-05, "loss": 0.0053, "step": 5830 }, { "epoch": 4.24, "grad_norm": 0.010969222523272038, "learning_rate": 4.377471153070282e-05, "loss": 0.0307, "step": 5840 }, { "epoch": 4.25, "grad_norm": 1.1569510698318481, "learning_rate": 4.375453885257807e-05, "loss": 0.0202, "step": 5850 }, { "epoch": 4.25, "grad_norm": 11.772544860839844, "learning_rate": 4.373436617445332e-05, "loss": 0.0179, "step": 5860 }, { "epoch": 4.26, "grad_norm": 29.545024871826172, "learning_rate": 4.3714193496328574e-05, "loss": 0.0177, "step": 5870 }, { "epoch": 4.27, "grad_norm": 0.005079995840787888, "learning_rate": 4.369402081820383e-05, "loss": 0.0288, "step": 5880 }, { "epoch": 4.28, "grad_norm": 0.520383894443512, "learning_rate": 4.3673848140079076e-05, "loss": 0.0416, "step": 5890 }, { "epoch": 4.28, "grad_norm": 4.170873165130615, "learning_rate": 4.365367546195433e-05, "loss": 0.0407, "step": 5900 }, { "epoch": 4.29, "grad_norm": 0.9381336569786072, "learning_rate": 4.3633502783829585e-05, "loss": 0.0184, "step": 5910 }, { "epoch": 4.3, "grad_norm": 7.8422064781188965, "learning_rate": 4.361333010570483e-05, "loss": 0.0224, "step": 5920 }, { "epoch": 4.3, "grad_norm": 0.09656038135290146, "learning_rate": 4.359315742758009e-05, "loss": 0.0224, "step": 5930 }, { "epoch": 4.31, "grad_norm": 9.255690574645996, "learning_rate": 4.357298474945534e-05, "loss": 0.0532, "step": 5940 }, { "epoch": 4.32, "grad_norm": 9.539785385131836, "learning_rate": 4.355281207133059e-05, "loss": 0.0523, "step": 5950 }, { "epoch": 4.33, "grad_norm": 0.2933836877346039, "learning_rate": 4.3532639393205844e-05, "loss": 0.0402, "step": 5960 }, { "epoch": 4.33, "grad_norm": 0.00884042214602232, "learning_rate": 4.35124667150811e-05, "loss": 0.0275, "step": 5970 }, { "epoch": 4.34, "grad_norm": 7.750953197479248, "learning_rate": 4.3492294036956346e-05, "loss": 0.0352, "step": 5980 }, { "epoch": 4.35, "grad_norm": 0.5286880135536194, "learning_rate": 4.34721213588316e-05, "loss": 0.0123, "step": 5990 }, { "epoch": 4.36, "grad_norm": 2.974754571914673, "learning_rate": 4.3451948680706855e-05, "loss": 0.0184, "step": 6000 }, { "epoch": 4.36, "grad_norm": 20.916255950927734, "learning_rate": 4.34317760025821e-05, "loss": 0.0159, "step": 6010 }, { "epoch": 4.37, "grad_norm": 10.342968940734863, "learning_rate": 4.341160332445736e-05, "loss": 0.0129, "step": 6020 }, { "epoch": 4.38, "grad_norm": 4.216011047363281, "learning_rate": 4.339143064633261e-05, "loss": 0.0285, "step": 6030 }, { "epoch": 4.38, "grad_norm": 16.87735366821289, "learning_rate": 4.337125796820786e-05, "loss": 0.0134, "step": 6040 }, { "epoch": 4.39, "grad_norm": 0.019013158977031708, "learning_rate": 4.3351085290083114e-05, "loss": 0.012, "step": 6050 }, { "epoch": 4.4, "grad_norm": 12.512757301330566, "learning_rate": 4.333091261195836e-05, "loss": 0.0481, "step": 6060 }, { "epoch": 4.41, "grad_norm": 8.553503036499023, "learning_rate": 4.3310739933833616e-05, "loss": 0.0139, "step": 6070 }, { "epoch": 4.41, "grad_norm": 14.92082405090332, "learning_rate": 4.329056725570887e-05, "loss": 0.0212, "step": 6080 }, { "epoch": 4.42, "grad_norm": 0.0787595734000206, "learning_rate": 4.3270394577584125e-05, "loss": 0.0398, "step": 6090 }, { "epoch": 4.43, "grad_norm": 0.6032419204711914, "learning_rate": 4.325022189945937e-05, "loss": 0.011, "step": 6100 }, { "epoch": 4.44, "grad_norm": 0.5409778356552124, "learning_rate": 4.323004922133463e-05, "loss": 0.0045, "step": 6110 }, { "epoch": 4.44, "grad_norm": 3.50061297416687, "learning_rate": 4.3209876543209875e-05, "loss": 0.0704, "step": 6120 }, { "epoch": 4.45, "grad_norm": 0.033528584986925125, "learning_rate": 4.318970386508513e-05, "loss": 0.0346, "step": 6130 }, { "epoch": 4.46, "grad_norm": 13.443081855773926, "learning_rate": 4.3169531186960384e-05, "loss": 0.0312, "step": 6140 }, { "epoch": 4.46, "grad_norm": 4.95137357711792, "learning_rate": 4.314935850883564e-05, "loss": 0.0301, "step": 6150 }, { "epoch": 4.47, "grad_norm": 3.4258296489715576, "learning_rate": 4.3129185830710886e-05, "loss": 0.0081, "step": 6160 }, { "epoch": 4.48, "grad_norm": 7.301894664764404, "learning_rate": 4.310901315258614e-05, "loss": 0.0255, "step": 6170 }, { "epoch": 4.49, "grad_norm": 21.892892837524414, "learning_rate": 4.308884047446139e-05, "loss": 0.0221, "step": 6180 }, { "epoch": 4.49, "grad_norm": 0.33506104350090027, "learning_rate": 4.306866779633664e-05, "loss": 0.0246, "step": 6190 }, { "epoch": 4.5, "grad_norm": 0.23564085364341736, "learning_rate": 4.30484951182119e-05, "loss": 0.0479, "step": 6200 }, { "epoch": 4.51, "grad_norm": 2.225267171859741, "learning_rate": 4.302832244008715e-05, "loss": 0.032, "step": 6210 }, { "epoch": 4.52, "grad_norm": 6.463624477386475, "learning_rate": 4.30081497619624e-05, "loss": 0.0219, "step": 6220 }, { "epoch": 4.52, "grad_norm": 0.11097526550292969, "learning_rate": 4.2987977083837654e-05, "loss": 0.0368, "step": 6230 }, { "epoch": 4.53, "grad_norm": 0.5188978910446167, "learning_rate": 4.29678044057129e-05, "loss": 0.0233, "step": 6240 }, { "epoch": 4.54, "grad_norm": 0.026288433000445366, "learning_rate": 4.2947631727588156e-05, "loss": 0.0075, "step": 6250 }, { "epoch": 4.54, "grad_norm": 0.03321617841720581, "learning_rate": 4.292745904946341e-05, "loss": 0.0423, "step": 6260 }, { "epoch": 4.55, "grad_norm": 6.4352874755859375, "learning_rate": 4.2907286371338665e-05, "loss": 0.0567, "step": 6270 }, { "epoch": 4.56, "grad_norm": 0.2122620791196823, "learning_rate": 4.288711369321391e-05, "loss": 0.048, "step": 6280 }, { "epoch": 4.57, "grad_norm": 1.9611496925354004, "learning_rate": 4.286694101508916e-05, "loss": 0.0357, "step": 6290 }, { "epoch": 4.57, "grad_norm": 14.888888359069824, "learning_rate": 4.2846768336964415e-05, "loss": 0.0133, "step": 6300 }, { "epoch": 4.58, "grad_norm": 3.209965705871582, "learning_rate": 4.282659565883967e-05, "loss": 0.0131, "step": 6310 }, { "epoch": 4.59, "grad_norm": 0.4727603495121002, "learning_rate": 4.2806422980714924e-05, "loss": 0.0388, "step": 6320 }, { "epoch": 4.6, "grad_norm": 20.515947341918945, "learning_rate": 4.278625030259018e-05, "loss": 0.0232, "step": 6330 }, { "epoch": 4.6, "grad_norm": 13.02455997467041, "learning_rate": 4.2766077624465426e-05, "loss": 0.0145, "step": 6340 }, { "epoch": 4.61, "grad_norm": 0.28295955061912537, "learning_rate": 4.274590494634067e-05, "loss": 0.0451, "step": 6350 }, { "epoch": 4.62, "grad_norm": 1.2605043649673462, "learning_rate": 4.272573226821593e-05, "loss": 0.0298, "step": 6360 }, { "epoch": 4.62, "grad_norm": 0.013523263856768608, "learning_rate": 4.270555959009118e-05, "loss": 0.0186, "step": 6370 }, { "epoch": 4.63, "grad_norm": 0.15178602933883667, "learning_rate": 4.268538691196644e-05, "loss": 0.0068, "step": 6380 }, { "epoch": 4.64, "grad_norm": 0.569879949092865, "learning_rate": 4.266521423384169e-05, "loss": 0.0259, "step": 6390 }, { "epoch": 4.65, "grad_norm": 0.3333960771560669, "learning_rate": 4.264504155571694e-05, "loss": 0.0791, "step": 6400 }, { "epoch": 4.65, "grad_norm": 0.40962207317352295, "learning_rate": 4.2624868877592187e-05, "loss": 0.0185, "step": 6410 }, { "epoch": 4.66, "grad_norm": 0.841285228729248, "learning_rate": 4.260469619946744e-05, "loss": 0.0585, "step": 6420 }, { "epoch": 4.67, "grad_norm": 0.23981308937072754, "learning_rate": 4.2584523521342696e-05, "loss": 0.0268, "step": 6430 }, { "epoch": 4.68, "grad_norm": 0.7248146533966064, "learning_rate": 4.256435084321795e-05, "loss": 0.0131, "step": 6440 }, { "epoch": 4.68, "grad_norm": 29.936283111572266, "learning_rate": 4.2544178165093204e-05, "loss": 0.0247, "step": 6450 }, { "epoch": 4.69, "grad_norm": 15.446109771728516, "learning_rate": 4.252400548696845e-05, "loss": 0.044, "step": 6460 }, { "epoch": 4.7, "grad_norm": 17.369585037231445, "learning_rate": 4.25038328088437e-05, "loss": 0.0611, "step": 6470 }, { "epoch": 4.7, "grad_norm": 12.479216575622559, "learning_rate": 4.2483660130718954e-05, "loss": 0.0482, "step": 6480 }, { "epoch": 4.71, "grad_norm": 7.170633316040039, "learning_rate": 4.246348745259421e-05, "loss": 0.0416, "step": 6490 }, { "epoch": 4.72, "grad_norm": 0.019940435886383057, "learning_rate": 4.244331477446946e-05, "loss": 0.0083, "step": 6500 }, { "epoch": 4.73, "grad_norm": 15.792586326599121, "learning_rate": 4.242314209634472e-05, "loss": 0.0235, "step": 6510 }, { "epoch": 4.73, "grad_norm": 0.1848461925983429, "learning_rate": 4.2402969418219965e-05, "loss": 0.05, "step": 6520 }, { "epoch": 4.74, "grad_norm": 9.763023376464844, "learning_rate": 4.238279674009521e-05, "loss": 0.0368, "step": 6530 }, { "epoch": 4.75, "grad_norm": 0.3894762396812439, "learning_rate": 4.236262406197047e-05, "loss": 0.0259, "step": 6540 }, { "epoch": 4.75, "grad_norm": 0.15589603781700134, "learning_rate": 4.234245138384572e-05, "loss": 0.0214, "step": 6550 }, { "epoch": 4.76, "grad_norm": 10.842832565307617, "learning_rate": 4.2322278705720976e-05, "loss": 0.029, "step": 6560 }, { "epoch": 4.77, "grad_norm": 0.747986376285553, "learning_rate": 4.230210602759623e-05, "loss": 0.0166, "step": 6570 }, { "epoch": 4.78, "grad_norm": 6.645331382751465, "learning_rate": 4.228193334947148e-05, "loss": 0.0122, "step": 6580 }, { "epoch": 4.78, "grad_norm": 0.048543334007263184, "learning_rate": 4.2261760671346726e-05, "loss": 0.0219, "step": 6590 }, { "epoch": 4.79, "grad_norm": 3.219879627227783, "learning_rate": 4.224158799322198e-05, "loss": 0.0327, "step": 6600 }, { "epoch": 4.8, "grad_norm": 0.23735283315181732, "learning_rate": 4.2221415315097235e-05, "loss": 0.0462, "step": 6610 }, { "epoch": 4.81, "grad_norm": 0.4189997911453247, "learning_rate": 4.220124263697249e-05, "loss": 0.0067, "step": 6620 }, { "epoch": 4.81, "grad_norm": 15.188400268554688, "learning_rate": 4.2181069958847744e-05, "loss": 0.0176, "step": 6630 }, { "epoch": 4.82, "grad_norm": 22.570255279541016, "learning_rate": 4.216089728072299e-05, "loss": 0.0241, "step": 6640 }, { "epoch": 4.83, "grad_norm": 18.151098251342773, "learning_rate": 4.214072460259824e-05, "loss": 0.0832, "step": 6650 }, { "epoch": 4.83, "grad_norm": 8.57191276550293, "learning_rate": 4.2120551924473494e-05, "loss": 0.0193, "step": 6660 }, { "epoch": 4.84, "grad_norm": 9.605646133422852, "learning_rate": 4.210037924634875e-05, "loss": 0.0168, "step": 6670 }, { "epoch": 4.85, "grad_norm": 2.7312123775482178, "learning_rate": 4.2080206568224e-05, "loss": 0.0612, "step": 6680 }, { "epoch": 4.86, "grad_norm": 1.0449771881103516, "learning_rate": 4.206003389009926e-05, "loss": 0.0238, "step": 6690 }, { "epoch": 4.86, "grad_norm": 14.31949234008789, "learning_rate": 4.20398612119745e-05, "loss": 0.0185, "step": 6700 }, { "epoch": 4.87, "grad_norm": 0.09144321084022522, "learning_rate": 4.201968853384975e-05, "loss": 0.0382, "step": 6710 }, { "epoch": 4.88, "grad_norm": 2.1980929374694824, "learning_rate": 4.199951585572501e-05, "loss": 0.0169, "step": 6720 }, { "epoch": 4.89, "grad_norm": 3.9242584705352783, "learning_rate": 4.197934317760026e-05, "loss": 0.0117, "step": 6730 }, { "epoch": 4.89, "grad_norm": 0.009010029025375843, "learning_rate": 4.1959170499475516e-05, "loss": 0.027, "step": 6740 }, { "epoch": 4.9, "grad_norm": 0.02251308038830757, "learning_rate": 4.193899782135077e-05, "loss": 0.036, "step": 6750 }, { "epoch": 4.91, "grad_norm": 0.2532757520675659, "learning_rate": 4.191882514322601e-05, "loss": 0.0098, "step": 6760 }, { "epoch": 4.91, "grad_norm": 15.302732467651367, "learning_rate": 4.1898652465101266e-05, "loss": 0.0105, "step": 6770 }, { "epoch": 4.92, "grad_norm": 9.087559700012207, "learning_rate": 4.187847978697652e-05, "loss": 0.0313, "step": 6780 }, { "epoch": 4.93, "grad_norm": 0.8211730718612671, "learning_rate": 4.1858307108851775e-05, "loss": 0.0581, "step": 6790 }, { "epoch": 4.94, "grad_norm": 0.49661290645599365, "learning_rate": 4.183813443072703e-05, "loss": 0.0336, "step": 6800 }, { "epoch": 4.94, "grad_norm": 8.808603286743164, "learning_rate": 4.181796175260228e-05, "loss": 0.0216, "step": 6810 }, { "epoch": 4.95, "grad_norm": 7.822020530700684, "learning_rate": 4.1797789074477525e-05, "loss": 0.0087, "step": 6820 }, { "epoch": 4.96, "grad_norm": 7.684726715087891, "learning_rate": 4.177761639635278e-05, "loss": 0.0207, "step": 6830 }, { "epoch": 4.97, "grad_norm": 29.289989471435547, "learning_rate": 4.1757443718228034e-05, "loss": 0.0635, "step": 6840 }, { "epoch": 4.97, "grad_norm": 1.1796934604644775, "learning_rate": 4.173727104010329e-05, "loss": 0.0063, "step": 6850 }, { "epoch": 4.98, "grad_norm": 4.763782024383545, "learning_rate": 4.171709836197854e-05, "loss": 0.0167, "step": 6860 }, { "epoch": 4.99, "grad_norm": 14.398111343383789, "learning_rate": 4.169692568385379e-05, "loss": 0.0234, "step": 6870 }, { "epoch": 4.99, "grad_norm": 9.24953842163086, "learning_rate": 4.167675300572904e-05, "loss": 0.0302, "step": 6880 }, { "epoch": 5.0, "eval_accuracy": 0.997220583564006, "eval_f1": 0.993579159787195, "eval_loss": 0.007504656910896301, "eval_precision": 0.9976317035945477, "eval_recall": 0.9895594069743161, "eval_roc_auc": 0.9999509363858444, "eval_runtime": 386.139, "eval_samples_per_second": 228.28, "eval_steps_per_second": 14.269, "step": 6887 }, { "epoch": 5.0, "grad_norm": 0.4994419515132904, "learning_rate": 4.165658032760429e-05, "loss": 0.0273, "step": 6890 }, { "epoch": 5.01, "grad_norm": 0.043419767171144485, "learning_rate": 4.163640764947955e-05, "loss": 0.0096, "step": 6900 }, { "epoch": 5.02, "grad_norm": 0.07488001883029938, "learning_rate": 4.16162349713548e-05, "loss": 0.0067, "step": 6910 }, { "epoch": 5.02, "grad_norm": 15.292566299438477, "learning_rate": 4.1596062293230056e-05, "loss": 0.0277, "step": 6920 }, { "epoch": 5.03, "grad_norm": 1.0951852798461914, "learning_rate": 4.1575889615105304e-05, "loss": 0.0054, "step": 6930 }, { "epoch": 5.04, "grad_norm": 11.888832092285156, "learning_rate": 4.155571693698055e-05, "loss": 0.0168, "step": 6940 }, { "epoch": 5.05, "grad_norm": 0.0656825602054596, "learning_rate": 4.1535544258855806e-05, "loss": 0.0123, "step": 6950 }, { "epoch": 5.05, "grad_norm": 22.420522689819336, "learning_rate": 4.151537158073106e-05, "loss": 0.0053, "step": 6960 }, { "epoch": 5.06, "grad_norm": 1.976753830909729, "learning_rate": 4.1495198902606315e-05, "loss": 0.0173, "step": 6970 }, { "epoch": 5.07, "grad_norm": 2.0210866928100586, "learning_rate": 4.147502622448157e-05, "loss": 0.0098, "step": 6980 }, { "epoch": 5.07, "grad_norm": 1.1973435878753662, "learning_rate": 4.145485354635682e-05, "loss": 0.054, "step": 6990 }, { "epoch": 5.08, "grad_norm": 0.7293339371681213, "learning_rate": 4.1434680868232065e-05, "loss": 0.0236, "step": 7000 }, { "epoch": 5.09, "grad_norm": 0.39437663555145264, "learning_rate": 4.141450819010732e-05, "loss": 0.0212, "step": 7010 }, { "epoch": 5.1, "grad_norm": 0.5972071290016174, "learning_rate": 4.1394335511982573e-05, "loss": 0.0189, "step": 7020 }, { "epoch": 5.1, "grad_norm": 1.5615042448043823, "learning_rate": 4.137416283385783e-05, "loss": 0.0195, "step": 7030 }, { "epoch": 5.11, "grad_norm": 0.0803157240152359, "learning_rate": 4.1353990155733076e-05, "loss": 0.0216, "step": 7040 }, { "epoch": 5.12, "grad_norm": 0.17535190284252167, "learning_rate": 4.133381747760833e-05, "loss": 0.0232, "step": 7050 }, { "epoch": 5.13, "grad_norm": 6.3075385093688965, "learning_rate": 4.131364479948358e-05, "loss": 0.0261, "step": 7060 }, { "epoch": 5.13, "grad_norm": 0.352460652589798, "learning_rate": 4.129347212135883e-05, "loss": 0.0184, "step": 7070 }, { "epoch": 5.14, "grad_norm": 0.83339524269104, "learning_rate": 4.127329944323409e-05, "loss": 0.019, "step": 7080 }, { "epoch": 5.15, "grad_norm": 0.037650395184755325, "learning_rate": 4.125312676510934e-05, "loss": 0.0139, "step": 7090 }, { "epoch": 5.15, "grad_norm": 0.45141324400901794, "learning_rate": 4.123295408698459e-05, "loss": 0.0224, "step": 7100 }, { "epoch": 5.16, "grad_norm": 4.516534805297852, "learning_rate": 4.121278140885984e-05, "loss": 0.0212, "step": 7110 }, { "epoch": 5.17, "grad_norm": 3.1175129413604736, "learning_rate": 4.119260873073509e-05, "loss": 0.0171, "step": 7120 }, { "epoch": 5.18, "grad_norm": 0.03533313795924187, "learning_rate": 4.1172436052610345e-05, "loss": 0.0136, "step": 7130 }, { "epoch": 5.18, "grad_norm": 0.273041307926178, "learning_rate": 4.11522633744856e-05, "loss": 0.0452, "step": 7140 }, { "epoch": 5.19, "grad_norm": 0.5384491086006165, "learning_rate": 4.1132090696360854e-05, "loss": 0.0313, "step": 7150 }, { "epoch": 5.2, "grad_norm": 0.5866811871528625, "learning_rate": 4.11119180182361e-05, "loss": 0.0235, "step": 7160 }, { "epoch": 5.21, "grad_norm": 14.287243843078613, "learning_rate": 4.1091745340111357e-05, "loss": 0.0128, "step": 7170 }, { "epoch": 5.21, "grad_norm": 13.50960636138916, "learning_rate": 4.1071572661986604e-05, "loss": 0.0175, "step": 7180 }, { "epoch": 5.22, "grad_norm": 13.826930046081543, "learning_rate": 4.105139998386186e-05, "loss": 0.0236, "step": 7190 }, { "epoch": 5.23, "grad_norm": 6.079420566558838, "learning_rate": 4.103122730573711e-05, "loss": 0.0313, "step": 7200 }, { "epoch": 5.23, "grad_norm": 20.14948272705078, "learning_rate": 4.101105462761237e-05, "loss": 0.0289, "step": 7210 }, { "epoch": 5.24, "grad_norm": 10.299389839172363, "learning_rate": 4.0990881949487615e-05, "loss": 0.0041, "step": 7220 }, { "epoch": 5.25, "grad_norm": 0.27182748913764954, "learning_rate": 4.097070927136287e-05, "loss": 0.0194, "step": 7230 }, { "epoch": 5.26, "grad_norm": 17.97000503540039, "learning_rate": 4.095053659323812e-05, "loss": 0.0186, "step": 7240 }, { "epoch": 5.26, "grad_norm": 11.942718505859375, "learning_rate": 4.093036391511337e-05, "loss": 0.012, "step": 7250 }, { "epoch": 5.27, "grad_norm": 17.377195358276367, "learning_rate": 4.0910191236988626e-05, "loss": 0.0375, "step": 7260 }, { "epoch": 5.28, "grad_norm": 0.3844599425792694, "learning_rate": 4.0890018558863874e-05, "loss": 0.0142, "step": 7270 }, { "epoch": 5.28, "grad_norm": 0.04491892457008362, "learning_rate": 4.086984588073913e-05, "loss": 0.0246, "step": 7280 }, { "epoch": 5.29, "grad_norm": 0.2385578751564026, "learning_rate": 4.084967320261438e-05, "loss": 0.0163, "step": 7290 }, { "epoch": 5.3, "grad_norm": 15.862750053405762, "learning_rate": 4.082950052448963e-05, "loss": 0.0127, "step": 7300 }, { "epoch": 5.31, "grad_norm": 0.6464446783065796, "learning_rate": 4.0809327846364885e-05, "loss": 0.0272, "step": 7310 }, { "epoch": 5.31, "grad_norm": 0.06587512791156769, "learning_rate": 4.078915516824014e-05, "loss": 0.0165, "step": 7320 }, { "epoch": 5.32, "grad_norm": 0.5429533123970032, "learning_rate": 4.076898249011539e-05, "loss": 0.0168, "step": 7330 }, { "epoch": 5.33, "grad_norm": 0.10347855091094971, "learning_rate": 4.074880981199064e-05, "loss": 0.0011, "step": 7340 }, { "epoch": 5.34, "grad_norm": 1.6859651803970337, "learning_rate": 4.0728637133865896e-05, "loss": 0.0088, "step": 7350 }, { "epoch": 5.34, "grad_norm": 0.5965036749839783, "learning_rate": 4.0708464455741144e-05, "loss": 0.0325, "step": 7360 }, { "epoch": 5.35, "grad_norm": 0.03945688530802727, "learning_rate": 4.06882917776164e-05, "loss": 0.0335, "step": 7370 }, { "epoch": 5.36, "grad_norm": 0.18698054552078247, "learning_rate": 4.066811909949165e-05, "loss": 0.0127, "step": 7380 }, { "epoch": 5.36, "grad_norm": 0.028903458267450333, "learning_rate": 4.06479464213669e-05, "loss": 0.0129, "step": 7390 }, { "epoch": 5.37, "grad_norm": 12.742640495300293, "learning_rate": 4.0627773743242155e-05, "loss": 0.0209, "step": 7400 }, { "epoch": 5.38, "grad_norm": 0.21819807589054108, "learning_rate": 4.060760106511741e-05, "loss": 0.0198, "step": 7410 }, { "epoch": 5.39, "grad_norm": 0.0063743069767951965, "learning_rate": 4.058742838699266e-05, "loss": 0.0225, "step": 7420 }, { "epoch": 5.39, "grad_norm": 6.776240825653076, "learning_rate": 4.056725570886791e-05, "loss": 0.0232, "step": 7430 }, { "epoch": 5.4, "grad_norm": 7.465283393859863, "learning_rate": 4.0547083030743166e-05, "loss": 0.0411, "step": 7440 }, { "epoch": 5.41, "grad_norm": 0.13503609597682953, "learning_rate": 4.0526910352618414e-05, "loss": 0.0198, "step": 7450 }, { "epoch": 5.42, "grad_norm": 5.632655620574951, "learning_rate": 4.050673767449367e-05, "loss": 0.0254, "step": 7460 }, { "epoch": 5.42, "grad_norm": 1.882933497428894, "learning_rate": 4.048656499636892e-05, "loss": 0.0129, "step": 7470 }, { "epoch": 5.43, "grad_norm": 2.3281795978546143, "learning_rate": 4.046639231824417e-05, "loss": 0.0061, "step": 7480 }, { "epoch": 5.44, "grad_norm": 0.12824377417564392, "learning_rate": 4.0446219640119425e-05, "loss": 0.0061, "step": 7490 }, { "epoch": 5.44, "grad_norm": 1.7312507629394531, "learning_rate": 4.042604696199468e-05, "loss": 0.0384, "step": 7500 }, { "epoch": 5.45, "grad_norm": 28.485464096069336, "learning_rate": 4.040587428386993e-05, "loss": 0.062, "step": 7510 }, { "epoch": 5.46, "grad_norm": 0.2245478332042694, "learning_rate": 4.038570160574518e-05, "loss": 0.0154, "step": 7520 }, { "epoch": 5.47, "grad_norm": 0.02461441047489643, "learning_rate": 4.0365528927620436e-05, "loss": 0.0162, "step": 7530 }, { "epoch": 5.47, "grad_norm": 0.0920153334736824, "learning_rate": 4.0345356249495684e-05, "loss": 0.0502, "step": 7540 }, { "epoch": 5.48, "grad_norm": 3.1807138919830322, "learning_rate": 4.032518357137094e-05, "loss": 0.0179, "step": 7550 }, { "epoch": 5.49, "grad_norm": 1.7319307327270508, "learning_rate": 4.0305010893246186e-05, "loss": 0.0105, "step": 7560 }, { "epoch": 5.5, "grad_norm": 5.294769287109375, "learning_rate": 4.028483821512144e-05, "loss": 0.0019, "step": 7570 }, { "epoch": 5.5, "grad_norm": 0.0027535264380276203, "learning_rate": 4.0264665536996695e-05, "loss": 0.0041, "step": 7580 }, { "epoch": 5.51, "grad_norm": 40.27950668334961, "learning_rate": 4.024449285887195e-05, "loss": 0.0186, "step": 7590 }, { "epoch": 5.52, "grad_norm": 0.08031009882688522, "learning_rate": 4.02243201807472e-05, "loss": 0.0153, "step": 7600 }, { "epoch": 5.52, "grad_norm": 0.31836315989494324, "learning_rate": 4.020414750262245e-05, "loss": 0.0159, "step": 7610 }, { "epoch": 5.53, "grad_norm": 0.3263595700263977, "learning_rate": 4.01839748244977e-05, "loss": 0.0286, "step": 7620 }, { "epoch": 5.54, "grad_norm": 29.408348083496094, "learning_rate": 4.0163802146372954e-05, "loss": 0.0328, "step": 7630 }, { "epoch": 5.55, "grad_norm": 0.6650347113609314, "learning_rate": 4.014362946824821e-05, "loss": 0.0076, "step": 7640 }, { "epoch": 5.55, "grad_norm": 0.03056545928120613, "learning_rate": 4.012345679012346e-05, "loss": 0.0193, "step": 7650 }, { "epoch": 5.56, "grad_norm": 21.62334442138672, "learning_rate": 4.010328411199871e-05, "loss": 0.0288, "step": 7660 }, { "epoch": 5.57, "grad_norm": 0.035483092069625854, "learning_rate": 4.0083111433873965e-05, "loss": 0.0227, "step": 7670 }, { "epoch": 5.58, "grad_norm": 0.013101032935082912, "learning_rate": 4.006293875574921e-05, "loss": 0.0536, "step": 7680 }, { "epoch": 5.58, "grad_norm": 0.007710463833063841, "learning_rate": 4.004276607762447e-05, "loss": 0.022, "step": 7690 }, { "epoch": 5.59, "grad_norm": 0.1644335836172104, "learning_rate": 4.002259339949972e-05, "loss": 0.0215, "step": 7700 }, { "epoch": 5.6, "grad_norm": 6.9764404296875, "learning_rate": 4.0002420721374976e-05, "loss": 0.0314, "step": 7710 }, { "epoch": 5.6, "grad_norm": 0.5656896233558655, "learning_rate": 3.9982248043250223e-05, "loss": 0.0501, "step": 7720 }, { "epoch": 5.61, "grad_norm": 0.26260992884635925, "learning_rate": 3.996207536512548e-05, "loss": 0.0281, "step": 7730 }, { "epoch": 5.62, "grad_norm": 0.4968787729740143, "learning_rate": 3.9941902687000726e-05, "loss": 0.016, "step": 7740 }, { "epoch": 5.63, "grad_norm": 10.740684509277344, "learning_rate": 3.992173000887598e-05, "loss": 0.0176, "step": 7750 }, { "epoch": 5.63, "grad_norm": 1.0777477025985718, "learning_rate": 3.9901557330751234e-05, "loss": 0.011, "step": 7760 }, { "epoch": 5.64, "grad_norm": 0.002109379041939974, "learning_rate": 3.988138465262649e-05, "loss": 0.0094, "step": 7770 }, { "epoch": 5.65, "grad_norm": 1.016344428062439, "learning_rate": 3.986121197450174e-05, "loss": 0.0249, "step": 7780 }, { "epoch": 5.66, "grad_norm": 22.406539916992188, "learning_rate": 3.9841039296376984e-05, "loss": 0.0149, "step": 7790 }, { "epoch": 5.66, "grad_norm": 1.52140474319458, "learning_rate": 3.982086661825224e-05, "loss": 0.021, "step": 7800 }, { "epoch": 5.67, "grad_norm": 0.23657099902629852, "learning_rate": 3.980069394012749e-05, "loss": 0.0368, "step": 7810 }, { "epoch": 5.68, "grad_norm": 0.6905073523521423, "learning_rate": 3.978052126200275e-05, "loss": 0.0199, "step": 7820 }, { "epoch": 5.68, "grad_norm": 1.6881111860275269, "learning_rate": 3.9760348583877995e-05, "loss": 0.023, "step": 7830 }, { "epoch": 5.69, "grad_norm": 0.24087966978549957, "learning_rate": 3.974017590575325e-05, "loss": 0.0352, "step": 7840 }, { "epoch": 5.7, "grad_norm": 0.4764993488788605, "learning_rate": 3.97200032276285e-05, "loss": 0.0434, "step": 7850 }, { "epoch": 5.71, "grad_norm": 2.340041399002075, "learning_rate": 3.969983054950375e-05, "loss": 0.0265, "step": 7860 }, { "epoch": 5.71, "grad_norm": 11.737015724182129, "learning_rate": 3.9679657871379007e-05, "loss": 0.0176, "step": 7870 }, { "epoch": 5.72, "grad_norm": 21.019853591918945, "learning_rate": 3.965948519325426e-05, "loss": 0.0234, "step": 7880 }, { "epoch": 5.73, "grad_norm": 0.0860639363527298, "learning_rate": 3.963931251512951e-05, "loss": 0.0107, "step": 7890 }, { "epoch": 5.74, "grad_norm": 0.27008432149887085, "learning_rate": 3.961913983700476e-05, "loss": 0.0222, "step": 7900 }, { "epoch": 5.74, "grad_norm": 0.33239853382110596, "learning_rate": 3.959896715888001e-05, "loss": 0.0101, "step": 7910 }, { "epoch": 5.75, "grad_norm": 8.389657974243164, "learning_rate": 3.9578794480755265e-05, "loss": 0.0255, "step": 7920 }, { "epoch": 5.76, "grad_norm": 0.008096696808934212, "learning_rate": 3.955862180263052e-05, "loss": 0.0299, "step": 7930 }, { "epoch": 5.76, "grad_norm": 0.0216965414583683, "learning_rate": 3.9538449124505774e-05, "loss": 0.0181, "step": 7940 }, { "epoch": 5.77, "grad_norm": 5.718286991119385, "learning_rate": 3.951827644638102e-05, "loss": 0.0114, "step": 7950 }, { "epoch": 5.78, "grad_norm": 0.2025628387928009, "learning_rate": 3.9498103768256276e-05, "loss": 0.0253, "step": 7960 }, { "epoch": 5.79, "grad_norm": 8.420141220092773, "learning_rate": 3.9477931090131524e-05, "loss": 0.0211, "step": 7970 }, { "epoch": 5.79, "grad_norm": 0.15034617483615875, "learning_rate": 3.945775841200678e-05, "loss": 0.0089, "step": 7980 }, { "epoch": 5.8, "grad_norm": 17.66687774658203, "learning_rate": 3.943758573388203e-05, "loss": 0.0302, "step": 7990 }, { "epoch": 5.81, "grad_norm": 1.483130693435669, "learning_rate": 3.941741305575729e-05, "loss": 0.0335, "step": 8000 }, { "epoch": 5.81, "grad_norm": 5.107597351074219, "learning_rate": 3.9397240377632535e-05, "loss": 0.0318, "step": 8010 }, { "epoch": 5.82, "grad_norm": 5.176906585693359, "learning_rate": 3.937706769950778e-05, "loss": 0.0152, "step": 8020 }, { "epoch": 5.83, "grad_norm": 0.008498461917042732, "learning_rate": 3.935689502138304e-05, "loss": 0.0219, "step": 8030 }, { "epoch": 5.84, "grad_norm": 0.003639570204541087, "learning_rate": 3.933672234325829e-05, "loss": 0.0078, "step": 8040 }, { "epoch": 5.84, "grad_norm": 0.7268972992897034, "learning_rate": 3.9316549665133546e-05, "loss": 0.0246, "step": 8050 }, { "epoch": 5.85, "grad_norm": 0.3043120205402374, "learning_rate": 3.92963769870088e-05, "loss": 0.03, "step": 8060 }, { "epoch": 5.86, "grad_norm": 0.22950021922588348, "learning_rate": 3.927620430888405e-05, "loss": 0.0191, "step": 8070 }, { "epoch": 5.87, "grad_norm": 19.542064666748047, "learning_rate": 3.9256031630759296e-05, "loss": 0.0147, "step": 8080 }, { "epoch": 5.87, "grad_norm": 0.10597487539052963, "learning_rate": 3.923585895263455e-05, "loss": 0.015, "step": 8090 }, { "epoch": 5.88, "grad_norm": 29.48893928527832, "learning_rate": 3.9215686274509805e-05, "loss": 0.0145, "step": 8100 }, { "epoch": 5.89, "grad_norm": 0.14151059091091156, "learning_rate": 3.919551359638506e-05, "loss": 0.0663, "step": 8110 }, { "epoch": 5.89, "grad_norm": 0.3282245695590973, "learning_rate": 3.9175340918260314e-05, "loss": 0.0439, "step": 8120 }, { "epoch": 5.9, "grad_norm": 6.798530101776123, "learning_rate": 3.915516824013556e-05, "loss": 0.0324, "step": 8130 }, { "epoch": 5.91, "grad_norm": 0.0029835246969014406, "learning_rate": 3.913499556201081e-05, "loss": 0.0056, "step": 8140 }, { "epoch": 5.92, "grad_norm": 0.050998538732528687, "learning_rate": 3.9114822883886064e-05, "loss": 0.0114, "step": 8150 }, { "epoch": 5.92, "grad_norm": 0.00737336790189147, "learning_rate": 3.909465020576132e-05, "loss": 0.0044, "step": 8160 }, { "epoch": 5.93, "grad_norm": 1.9938186407089233, "learning_rate": 3.907447752763657e-05, "loss": 0.0131, "step": 8170 }, { "epoch": 5.94, "grad_norm": 2.643894672393799, "learning_rate": 3.905430484951183e-05, "loss": 0.0151, "step": 8180 }, { "epoch": 5.95, "grad_norm": 0.9170699715614319, "learning_rate": 3.9034132171387075e-05, "loss": 0.0086, "step": 8190 }, { "epoch": 5.95, "grad_norm": 0.027857156470417976, "learning_rate": 3.901395949326232e-05, "loss": 0.013, "step": 8200 }, { "epoch": 5.96, "grad_norm": 0.005472167860716581, "learning_rate": 3.899378681513758e-05, "loss": 0.0317, "step": 8210 }, { "epoch": 5.97, "grad_norm": 26.97398567199707, "learning_rate": 3.897361413701283e-05, "loss": 0.0407, "step": 8220 }, { "epoch": 5.97, "grad_norm": 5.671159744262695, "learning_rate": 3.8953441458888086e-05, "loss": 0.0463, "step": 8230 }, { "epoch": 5.98, "grad_norm": 0.005169416777789593, "learning_rate": 3.893326878076334e-05, "loss": 0.0087, "step": 8240 }, { "epoch": 5.99, "grad_norm": 0.06089169904589653, "learning_rate": 3.891309610263859e-05, "loss": 0.0096, "step": 8250 }, { "epoch": 6.0, "grad_norm": 0.1659688800573349, "learning_rate": 3.8892923424513836e-05, "loss": 0.0073, "step": 8260 }, { "epoch": 6.0, "eval_accuracy": 0.9975609202704543, "eval_f1": 0.9943809946946136, "eval_loss": 0.006377417594194412, "eval_precision": 0.9956560422881666, "eval_recall": 0.9931092086030486, "eval_roc_auc": 0.9999651368605089, "eval_runtime": 386.667, "eval_samples_per_second": 227.969, "eval_steps_per_second": 14.25, "step": 8265 }, { "epoch": 6.0, "grad_norm": 0.3143859803676605, "learning_rate": 3.887275074638909e-05, "loss": 0.0049, "step": 8270 }, { "epoch": 6.01, "grad_norm": 0.06847266852855682, "learning_rate": 3.8852578068264345e-05, "loss": 0.0059, "step": 8280 }, { "epoch": 6.02, "grad_norm": 23.29844093322754, "learning_rate": 3.88324053901396e-05, "loss": 0.0419, "step": 8290 }, { "epoch": 6.03, "grad_norm": 0.010631518438458443, "learning_rate": 3.8812232712014854e-05, "loss": 0.0267, "step": 8300 }, { "epoch": 6.03, "grad_norm": 0.05696843937039375, "learning_rate": 3.87920600338901e-05, "loss": 0.0061, "step": 8310 }, { "epoch": 6.04, "grad_norm": 0.07849406450986862, "learning_rate": 3.877188735576535e-05, "loss": 0.0147, "step": 8320 }, { "epoch": 6.05, "grad_norm": 0.006846526637673378, "learning_rate": 3.8751714677640603e-05, "loss": 0.0081, "step": 8330 }, { "epoch": 6.05, "grad_norm": 2.307755708694458, "learning_rate": 3.873154199951586e-05, "loss": 0.0197, "step": 8340 }, { "epoch": 6.06, "grad_norm": 16.542884826660156, "learning_rate": 3.871136932139111e-05, "loss": 0.0132, "step": 8350 }, { "epoch": 6.07, "grad_norm": 0.5160506367683411, "learning_rate": 3.869119664326637e-05, "loss": 0.0022, "step": 8360 }, { "epoch": 6.08, "grad_norm": 0.0947955921292305, "learning_rate": 3.8671023965141615e-05, "loss": 0.0067, "step": 8370 }, { "epoch": 6.08, "grad_norm": 2.2483770847320557, "learning_rate": 3.865085128701686e-05, "loss": 0.0141, "step": 8380 }, { "epoch": 6.09, "grad_norm": 2.6705210208892822, "learning_rate": 3.863067860889212e-05, "loss": 0.0032, "step": 8390 }, { "epoch": 6.1, "grad_norm": 16.716379165649414, "learning_rate": 3.861050593076737e-05, "loss": 0.0193, "step": 8400 }, { "epoch": 6.11, "grad_norm": 5.897037982940674, "learning_rate": 3.8590333252642626e-05, "loss": 0.0197, "step": 8410 }, { "epoch": 6.11, "grad_norm": 3.2264394760131836, "learning_rate": 3.857016057451788e-05, "loss": 0.0099, "step": 8420 }, { "epoch": 6.12, "grad_norm": 3.9197843074798584, "learning_rate": 3.854998789639313e-05, "loss": 0.0288, "step": 8430 }, { "epoch": 6.13, "grad_norm": 0.2298147827386856, "learning_rate": 3.8529815218268376e-05, "loss": 0.0182, "step": 8440 }, { "epoch": 6.13, "grad_norm": 7.973130702972412, "learning_rate": 3.850964254014363e-05, "loss": 0.021, "step": 8450 }, { "epoch": 6.14, "grad_norm": 0.713499903678894, "learning_rate": 3.8489469862018884e-05, "loss": 0.0219, "step": 8460 }, { "epoch": 6.15, "grad_norm": 0.004311359953135252, "learning_rate": 3.846929718389414e-05, "loss": 0.0205, "step": 8470 }, { "epoch": 6.16, "grad_norm": 0.14316757023334503, "learning_rate": 3.844912450576939e-05, "loss": 0.0262, "step": 8480 }, { "epoch": 6.16, "grad_norm": 0.22620250284671783, "learning_rate": 3.842895182764464e-05, "loss": 0.0109, "step": 8490 }, { "epoch": 6.17, "grad_norm": 20.04834747314453, "learning_rate": 3.840877914951989e-05, "loss": 0.0218, "step": 8500 }, { "epoch": 6.18, "grad_norm": 0.3280732035636902, "learning_rate": 3.838860647139514e-05, "loss": 0.0102, "step": 8510 }, { "epoch": 6.19, "grad_norm": 0.06915584206581116, "learning_rate": 3.83684337932704e-05, "loss": 0.0247, "step": 8520 }, { "epoch": 6.19, "grad_norm": 0.0052139488980174065, "learning_rate": 3.834826111514565e-05, "loss": 0.016, "step": 8530 }, { "epoch": 6.2, "grad_norm": 1.4115946292877197, "learning_rate": 3.83280884370209e-05, "loss": 0.0067, "step": 8540 }, { "epoch": 6.21, "grad_norm": 0.0992535725235939, "learning_rate": 3.8307915758896154e-05, "loss": 0.0163, "step": 8550 }, { "epoch": 6.21, "grad_norm": 0.05193591117858887, "learning_rate": 3.82877430807714e-05, "loss": 0.027, "step": 8560 }, { "epoch": 6.22, "grad_norm": 0.004640494007617235, "learning_rate": 3.8267570402646656e-05, "loss": 0.0183, "step": 8570 }, { "epoch": 6.23, "grad_norm": 2.3029911518096924, "learning_rate": 3.824739772452191e-05, "loss": 0.0091, "step": 8580 }, { "epoch": 6.24, "grad_norm": 0.002811912214383483, "learning_rate": 3.8227225046397165e-05, "loss": 0.0265, "step": 8590 }, { "epoch": 6.24, "grad_norm": 0.10390030592679977, "learning_rate": 3.820705236827241e-05, "loss": 0.0186, "step": 8600 }, { "epoch": 6.25, "grad_norm": 3.3613178730010986, "learning_rate": 3.818687969014767e-05, "loss": 0.0144, "step": 8610 }, { "epoch": 6.26, "grad_norm": 0.3605481684207916, "learning_rate": 3.8166707012022915e-05, "loss": 0.0421, "step": 8620 }, { "epoch": 6.26, "grad_norm": 0.12366246432065964, "learning_rate": 3.814653433389817e-05, "loss": 0.0105, "step": 8630 }, { "epoch": 6.27, "grad_norm": 3.998110294342041, "learning_rate": 3.8126361655773424e-05, "loss": 0.0449, "step": 8640 }, { "epoch": 6.28, "grad_norm": 3.1826484203338623, "learning_rate": 3.810618897764868e-05, "loss": 0.0302, "step": 8650 }, { "epoch": 6.29, "grad_norm": 0.11859409511089325, "learning_rate": 3.8086016299523926e-05, "loss": 0.013, "step": 8660 }, { "epoch": 6.29, "grad_norm": 1.415103793144226, "learning_rate": 3.806584362139918e-05, "loss": 0.0273, "step": 8670 }, { "epoch": 6.3, "grad_norm": 0.10443487763404846, "learning_rate": 3.804567094327443e-05, "loss": 0.0236, "step": 8680 }, { "epoch": 6.31, "grad_norm": 3.090874671936035, "learning_rate": 3.802549826514968e-05, "loss": 0.0326, "step": 8690 }, { "epoch": 6.32, "grad_norm": 0.018442168831825256, "learning_rate": 3.800532558702494e-05, "loss": 0.0238, "step": 8700 }, { "epoch": 6.32, "grad_norm": 0.057262253016233444, "learning_rate": 3.798515290890019e-05, "loss": 0.0048, "step": 8710 }, { "epoch": 6.33, "grad_norm": 0.06799819320440292, "learning_rate": 3.796498023077544e-05, "loss": 0.0098, "step": 8720 }, { "epoch": 6.34, "grad_norm": 0.0023398185148835182, "learning_rate": 3.7944807552650694e-05, "loss": 0.0153, "step": 8730 }, { "epoch": 6.34, "grad_norm": 0.2396043837070465, "learning_rate": 3.792463487452594e-05, "loss": 0.0063, "step": 8740 }, { "epoch": 6.35, "grad_norm": 18.22637176513672, "learning_rate": 3.7904462196401196e-05, "loss": 0.0098, "step": 8750 }, { "epoch": 6.36, "grad_norm": 23.354751586914062, "learning_rate": 3.788428951827645e-05, "loss": 0.0068, "step": 8760 }, { "epoch": 6.37, "grad_norm": 0.0036677473690360785, "learning_rate": 3.78641168401517e-05, "loss": 0.0038, "step": 8770 }, { "epoch": 6.37, "grad_norm": 0.006179885007441044, "learning_rate": 3.784394416202695e-05, "loss": 0.0075, "step": 8780 }, { "epoch": 6.38, "grad_norm": 0.1622333824634552, "learning_rate": 3.782377148390221e-05, "loss": 0.0057, "step": 8790 }, { "epoch": 6.39, "grad_norm": 0.019087301567196846, "learning_rate": 3.7803598805777455e-05, "loss": 0.0084, "step": 8800 }, { "epoch": 6.4, "grad_norm": 0.09246546030044556, "learning_rate": 3.778342612765271e-05, "loss": 0.0086, "step": 8810 }, { "epoch": 6.4, "grad_norm": 1.2068523168563843, "learning_rate": 3.7763253449527964e-05, "loss": 0.0308, "step": 8820 }, { "epoch": 6.41, "grad_norm": 11.821285247802734, "learning_rate": 3.774308077140321e-05, "loss": 0.0085, "step": 8830 }, { "epoch": 6.42, "grad_norm": 8.977716445922852, "learning_rate": 3.7722908093278466e-05, "loss": 0.0068, "step": 8840 }, { "epoch": 6.42, "grad_norm": 0.0026208017952740192, "learning_rate": 3.7702735415153714e-05, "loss": 0.0109, "step": 8850 }, { "epoch": 6.43, "grad_norm": 0.00370892439968884, "learning_rate": 3.768256273702897e-05, "loss": 0.0169, "step": 8860 }, { "epoch": 6.44, "grad_norm": 0.37159231305122375, "learning_rate": 3.766239005890422e-05, "loss": 0.005, "step": 8870 }, { "epoch": 6.45, "grad_norm": 0.39433690905570984, "learning_rate": 3.764221738077948e-05, "loss": 0.0094, "step": 8880 }, { "epoch": 6.45, "grad_norm": 0.17372171580791473, "learning_rate": 3.7622044702654725e-05, "loss": 0.018, "step": 8890 }, { "epoch": 6.46, "grad_norm": 0.02303539402782917, "learning_rate": 3.760187202452998e-05, "loss": 0.015, "step": 8900 }, { "epoch": 6.47, "grad_norm": 29.997024536132812, "learning_rate": 3.758169934640523e-05, "loss": 0.023, "step": 8910 }, { "epoch": 6.48, "grad_norm": 28.65287971496582, "learning_rate": 3.756152666828048e-05, "loss": 0.0163, "step": 8920 }, { "epoch": 6.48, "grad_norm": 0.004773481283336878, "learning_rate": 3.7541353990155736e-05, "loss": 0.007, "step": 8930 }, { "epoch": 6.49, "grad_norm": 19.97212028503418, "learning_rate": 3.752118131203099e-05, "loss": 0.0238, "step": 8940 }, { "epoch": 6.5, "grad_norm": 0.3827419877052307, "learning_rate": 3.750100863390624e-05, "loss": 0.0245, "step": 8950 }, { "epoch": 6.5, "grad_norm": 0.15338027477264404, "learning_rate": 3.748083595578149e-05, "loss": 0.0162, "step": 8960 }, { "epoch": 6.51, "grad_norm": 16.326635360717773, "learning_rate": 3.746066327765674e-05, "loss": 0.0166, "step": 8970 }, { "epoch": 6.52, "grad_norm": 0.9681591987609863, "learning_rate": 3.7440490599531995e-05, "loss": 0.0065, "step": 8980 }, { "epoch": 6.53, "grad_norm": 18.14190673828125, "learning_rate": 3.742031792140725e-05, "loss": 0.0255, "step": 8990 }, { "epoch": 6.53, "grad_norm": 5.85013484954834, "learning_rate": 3.74001452432825e-05, "loss": 0.0071, "step": 9000 }, { "epoch": 6.54, "grad_norm": 0.15171761810779572, "learning_rate": 3.737997256515775e-05, "loss": 0.0121, "step": 9010 }, { "epoch": 6.55, "grad_norm": 0.031181402504444122, "learning_rate": 3.7359799887033006e-05, "loss": 0.0126, "step": 9020 }, { "epoch": 6.56, "grad_norm": 0.003250251989811659, "learning_rate": 3.7339627208908253e-05, "loss": 0.0045, "step": 9030 }, { "epoch": 6.56, "grad_norm": 8.333500862121582, "learning_rate": 3.731945453078351e-05, "loss": 0.0225, "step": 9040 }, { "epoch": 6.57, "grad_norm": 0.039202239364385605, "learning_rate": 3.729928185265876e-05, "loss": 0.0132, "step": 9050 }, { "epoch": 6.58, "grad_norm": 11.631218910217285, "learning_rate": 3.727910917453401e-05, "loss": 0.0262, "step": 9060 }, { "epoch": 6.58, "grad_norm": 6.861734390258789, "learning_rate": 3.7258936496409265e-05, "loss": 0.0419, "step": 9070 }, { "epoch": 6.59, "grad_norm": 0.08070015907287598, "learning_rate": 3.723876381828452e-05, "loss": 0.0584, "step": 9080 }, { "epoch": 6.6, "grad_norm": 19.176902770996094, "learning_rate": 3.721859114015977e-05, "loss": 0.0271, "step": 9090 }, { "epoch": 6.61, "grad_norm": 0.048521075397729874, "learning_rate": 3.719841846203502e-05, "loss": 0.0117, "step": 9100 }, { "epoch": 6.61, "grad_norm": 0.39843207597732544, "learning_rate": 3.7178245783910276e-05, "loss": 0.0072, "step": 9110 }, { "epoch": 6.62, "grad_norm": 0.0638517439365387, "learning_rate": 3.715807310578552e-05, "loss": 0.0088, "step": 9120 }, { "epoch": 6.63, "grad_norm": 0.6130620241165161, "learning_rate": 3.713790042766078e-05, "loss": 0.0039, "step": 9130 }, { "epoch": 6.64, "grad_norm": 1.8792153596878052, "learning_rate": 3.711772774953603e-05, "loss": 0.0136, "step": 9140 }, { "epoch": 6.64, "grad_norm": 3.8683571815490723, "learning_rate": 3.709755507141128e-05, "loss": 0.0241, "step": 9150 }, { "epoch": 6.65, "grad_norm": 1.621826171875, "learning_rate": 3.7077382393286534e-05, "loss": 0.0059, "step": 9160 }, { "epoch": 6.66, "grad_norm": 0.018028290942311287, "learning_rate": 3.705720971516179e-05, "loss": 0.0139, "step": 9170 }, { "epoch": 6.66, "grad_norm": 14.395564079284668, "learning_rate": 3.7037037037037037e-05, "loss": 0.0075, "step": 9180 }, { "epoch": 6.67, "grad_norm": 0.013322776183485985, "learning_rate": 3.701686435891229e-05, "loss": 0.0184, "step": 9190 }, { "epoch": 6.68, "grad_norm": 6.382280349731445, "learning_rate": 3.6996691680787545e-05, "loss": 0.0219, "step": 9200 }, { "epoch": 6.69, "grad_norm": 9.223612785339355, "learning_rate": 3.697651900266279e-05, "loss": 0.019, "step": 9210 }, { "epoch": 6.69, "grad_norm": 0.005767362657934427, "learning_rate": 3.695634632453805e-05, "loss": 0.0104, "step": 9220 }, { "epoch": 6.7, "grad_norm": 0.18802551925182343, "learning_rate": 3.69361736464133e-05, "loss": 0.0096, "step": 9230 }, { "epoch": 6.71, "grad_norm": 0.18698835372924805, "learning_rate": 3.691600096828855e-05, "loss": 0.0207, "step": 9240 }, { "epoch": 6.72, "grad_norm": 0.004097466357052326, "learning_rate": 3.6895828290163804e-05, "loss": 0.0165, "step": 9250 }, { "epoch": 6.72, "grad_norm": 0.17292384803295135, "learning_rate": 3.687565561203906e-05, "loss": 0.0306, "step": 9260 }, { "epoch": 6.73, "grad_norm": 0.30295267701148987, "learning_rate": 3.6855482933914306e-05, "loss": 0.01, "step": 9270 }, { "epoch": 6.74, "grad_norm": 0.21782360970973969, "learning_rate": 3.683531025578956e-05, "loss": 0.0434, "step": 9280 }, { "epoch": 6.74, "grad_norm": 14.22446346282959, "learning_rate": 3.681513757766481e-05, "loss": 0.0338, "step": 9290 }, { "epoch": 6.75, "grad_norm": 0.8099685311317444, "learning_rate": 3.679496489954006e-05, "loss": 0.0099, "step": 9300 }, { "epoch": 6.76, "grad_norm": 6.104836940765381, "learning_rate": 3.677479222141532e-05, "loss": 0.011, "step": 9310 }, { "epoch": 6.77, "grad_norm": 0.00373910553753376, "learning_rate": 3.675461954329057e-05, "loss": 0.0215, "step": 9320 }, { "epoch": 6.77, "grad_norm": 0.22883032262325287, "learning_rate": 3.673444686516582e-05, "loss": 0.0045, "step": 9330 }, { "epoch": 6.78, "grad_norm": 3.783512592315674, "learning_rate": 3.6714274187041074e-05, "loss": 0.0276, "step": 9340 }, { "epoch": 6.79, "grad_norm": 0.17120474576950073, "learning_rate": 3.669410150891632e-05, "loss": 0.022, "step": 9350 }, { "epoch": 6.79, "grad_norm": 0.04094693809747696, "learning_rate": 3.6673928830791576e-05, "loss": 0.0047, "step": 9360 }, { "epoch": 6.8, "grad_norm": 0.7232370376586914, "learning_rate": 3.665375615266683e-05, "loss": 0.0258, "step": 9370 }, { "epoch": 6.81, "grad_norm": 16.711807250976562, "learning_rate": 3.6633583474542085e-05, "loss": 0.0124, "step": 9380 }, { "epoch": 6.82, "grad_norm": 31.93790626525879, "learning_rate": 3.661341079641733e-05, "loss": 0.006, "step": 9390 }, { "epoch": 6.82, "grad_norm": 6.95900297164917, "learning_rate": 3.659323811829259e-05, "loss": 0.0265, "step": 9400 }, { "epoch": 6.83, "grad_norm": 5.2735161781311035, "learning_rate": 3.6573065440167835e-05, "loss": 0.0323, "step": 9410 }, { "epoch": 6.84, "grad_norm": 0.699161171913147, "learning_rate": 3.655289276204309e-05, "loss": 0.0164, "step": 9420 }, { "epoch": 6.85, "grad_norm": 16.414228439331055, "learning_rate": 3.6532720083918344e-05, "loss": 0.0145, "step": 9430 }, { "epoch": 6.85, "grad_norm": 0.07426783442497253, "learning_rate": 3.65125474057936e-05, "loss": 0.0048, "step": 9440 }, { "epoch": 6.86, "grad_norm": 0.013512199744582176, "learning_rate": 3.6492374727668846e-05, "loss": 0.0065, "step": 9450 }, { "epoch": 6.87, "grad_norm": 0.6285327672958374, "learning_rate": 3.64722020495441e-05, "loss": 0.0416, "step": 9460 }, { "epoch": 6.87, "grad_norm": 5.466975688934326, "learning_rate": 3.645202937141935e-05, "loss": 0.0109, "step": 9470 }, { "epoch": 6.88, "grad_norm": 0.16771970689296722, "learning_rate": 3.64318566932946e-05, "loss": 0.0293, "step": 9480 }, { "epoch": 6.89, "grad_norm": 12.002236366271973, "learning_rate": 3.641168401516986e-05, "loss": 0.0189, "step": 9490 }, { "epoch": 6.9, "grad_norm": 0.3899655044078827, "learning_rate": 3.639151133704511e-05, "loss": 0.0376, "step": 9500 }, { "epoch": 6.9, "grad_norm": 7.12745475769043, "learning_rate": 3.637133865892036e-05, "loss": 0.0093, "step": 9510 }, { "epoch": 6.91, "grad_norm": 1.6025562286376953, "learning_rate": 3.635116598079561e-05, "loss": 0.0109, "step": 9520 }, { "epoch": 6.92, "grad_norm": 0.007232175208628178, "learning_rate": 3.633099330267086e-05, "loss": 0.0116, "step": 9530 }, { "epoch": 6.93, "grad_norm": 0.010008217766880989, "learning_rate": 3.6310820624546116e-05, "loss": 0.0203, "step": 9540 }, { "epoch": 6.93, "grad_norm": 0.10533101111650467, "learning_rate": 3.629064794642137e-05, "loss": 0.0299, "step": 9550 }, { "epoch": 6.94, "grad_norm": 0.09911059588193893, "learning_rate": 3.6270475268296625e-05, "loss": 0.0145, "step": 9560 }, { "epoch": 6.95, "grad_norm": 2.048704147338867, "learning_rate": 3.625030259017187e-05, "loss": 0.0076, "step": 9570 }, { "epoch": 6.95, "grad_norm": 0.05403584986925125, "learning_rate": 3.623012991204712e-05, "loss": 0.0086, "step": 9580 }, { "epoch": 6.96, "grad_norm": 0.12622416019439697, "learning_rate": 3.6209957233922375e-05, "loss": 0.0226, "step": 9590 }, { "epoch": 6.97, "grad_norm": 0.004092243034392595, "learning_rate": 3.618978455579763e-05, "loss": 0.0134, "step": 9600 }, { "epoch": 6.98, "grad_norm": 17.107080459594727, "learning_rate": 3.6169611877672884e-05, "loss": 0.0195, "step": 9610 }, { "epoch": 6.98, "grad_norm": 0.022195547819137573, "learning_rate": 3.614943919954814e-05, "loss": 0.0255, "step": 9620 }, { "epoch": 6.99, "grad_norm": 0.04998145252466202, "learning_rate": 3.6129266521423386e-05, "loss": 0.006, "step": 9630 }, { "epoch": 7.0, "grad_norm": 0.027100518345832825, "learning_rate": 3.6109093843298634e-05, "loss": 0.016, "step": 9640 }, { "epoch": 7.0, "eval_accuracy": 0.9974588192585198, "eval_f1": 0.9941486860665587, "eval_loss": 0.006671547889709473, "eval_precision": 0.994928369758444, "eval_recall": 0.9933702234286907, "eval_roc_auc": 0.9999722872260652, "eval_runtime": 387.013, "eval_samples_per_second": 227.765, "eval_steps_per_second": 14.237, "step": 9642 }, { "epoch": 7.01, "grad_norm": 0.15103967487812042, "learning_rate": 3.608892116517389e-05, "loss": 0.008, "step": 9650 }, { "epoch": 7.01, "grad_norm": 1.6163809299468994, "learning_rate": 3.606874848704914e-05, "loss": 0.006, "step": 9660 }, { "epoch": 7.02, "grad_norm": 0.4985108971595764, "learning_rate": 3.60485758089244e-05, "loss": 0.0162, "step": 9670 }, { "epoch": 7.03, "grad_norm": 0.056671515107154846, "learning_rate": 3.602840313079965e-05, "loss": 0.0039, "step": 9680 }, { "epoch": 7.03, "grad_norm": 12.355618476867676, "learning_rate": 3.60082304526749e-05, "loss": 0.0501, "step": 9690 }, { "epoch": 7.04, "grad_norm": 11.992454528808594, "learning_rate": 3.598805777455015e-05, "loss": 0.045, "step": 9700 }, { "epoch": 7.05, "grad_norm": 0.22388532757759094, "learning_rate": 3.59678850964254e-05, "loss": 0.0217, "step": 9710 }, { "epoch": 7.06, "grad_norm": 1.1855632066726685, "learning_rate": 3.5947712418300656e-05, "loss": 0.0188, "step": 9720 }, { "epoch": 7.06, "grad_norm": 0.43892866373062134, "learning_rate": 3.592753974017591e-05, "loss": 0.0112, "step": 9730 }, { "epoch": 7.07, "grad_norm": 15.013214111328125, "learning_rate": 3.5907367062051165e-05, "loss": 0.0222, "step": 9740 }, { "epoch": 7.08, "grad_norm": 1.0387877225875854, "learning_rate": 3.588719438392641e-05, "loss": 0.0233, "step": 9750 }, { "epoch": 7.09, "grad_norm": 0.41390901803970337, "learning_rate": 3.586702170580166e-05, "loss": 0.0138, "step": 9760 }, { "epoch": 7.09, "grad_norm": 0.1715419590473175, "learning_rate": 3.5846849027676914e-05, "loss": 0.0093, "step": 9770 }, { "epoch": 7.1, "grad_norm": 0.01624373160302639, "learning_rate": 3.582667634955217e-05, "loss": 0.0065, "step": 9780 }, { "epoch": 7.11, "grad_norm": 4.485403060913086, "learning_rate": 3.5806503671427423e-05, "loss": 0.0163, "step": 9790 }, { "epoch": 7.11, "grad_norm": 0.5213087797164917, "learning_rate": 3.578633099330268e-05, "loss": 0.0152, "step": 9800 }, { "epoch": 7.12, "grad_norm": 0.0917457863688469, "learning_rate": 3.5766158315177926e-05, "loss": 0.0043, "step": 9810 }, { "epoch": 7.13, "grad_norm": 0.13996107876300812, "learning_rate": 3.574598563705317e-05, "loss": 0.0056, "step": 9820 }, { "epoch": 7.14, "grad_norm": 0.2892821431159973, "learning_rate": 3.572581295892843e-05, "loss": 0.0249, "step": 9830 }, { "epoch": 7.14, "grad_norm": 0.10906893014907837, "learning_rate": 3.570564028080368e-05, "loss": 0.0026, "step": 9840 }, { "epoch": 7.15, "grad_norm": 0.19463422894477844, "learning_rate": 3.568546760267894e-05, "loss": 0.0109, "step": 9850 }, { "epoch": 7.16, "grad_norm": 0.043046820908784866, "learning_rate": 3.566529492455419e-05, "loss": 0.0045, "step": 9860 }, { "epoch": 7.17, "grad_norm": 0.0024864105507731438, "learning_rate": 3.564512224642944e-05, "loss": 0.0018, "step": 9870 }, { "epoch": 7.17, "grad_norm": 0.0023697796277701855, "learning_rate": 3.5624949568304687e-05, "loss": 0.0214, "step": 9880 }, { "epoch": 7.18, "grad_norm": 0.17014308273792267, "learning_rate": 3.560477689017994e-05, "loss": 0.0135, "step": 9890 }, { "epoch": 7.19, "grad_norm": 0.06295622885227203, "learning_rate": 3.5584604212055195e-05, "loss": 0.0222, "step": 9900 }, { "epoch": 7.19, "grad_norm": 0.06391363590955734, "learning_rate": 3.556443153393045e-05, "loss": 0.0054, "step": 9910 }, { "epoch": 7.2, "grad_norm": 0.01831732876598835, "learning_rate": 3.5544258855805704e-05, "loss": 0.0092, "step": 9920 }, { "epoch": 7.21, "grad_norm": 0.011373781599104404, "learning_rate": 3.5524086177680945e-05, "loss": 0.0033, "step": 9930 }, { "epoch": 7.22, "grad_norm": 0.021700827404856682, "learning_rate": 3.55039134995562e-05, "loss": 0.042, "step": 9940 }, { "epoch": 7.22, "grad_norm": 0.06618179380893707, "learning_rate": 3.5483740821431454e-05, "loss": 0.0129, "step": 9950 }, { "epoch": 7.23, "grad_norm": 5.286431789398193, "learning_rate": 3.546356814330671e-05, "loss": 0.0244, "step": 9960 }, { "epoch": 7.24, "grad_norm": 4.6873250007629395, "learning_rate": 3.544339546518196e-05, "loss": 0.0088, "step": 9970 }, { "epoch": 7.25, "grad_norm": 0.43198829889297485, "learning_rate": 3.542322278705721e-05, "loss": 0.0122, "step": 9980 }, { "epoch": 7.25, "grad_norm": 0.011568567715585232, "learning_rate": 3.540305010893246e-05, "loss": 0.0054, "step": 9990 }, { "epoch": 7.26, "grad_norm": 0.04392976313829422, "learning_rate": 3.538287743080771e-05, "loss": 0.0326, "step": 10000 }, { "epoch": 7.27, "grad_norm": 0.10357452183961868, "learning_rate": 3.536270475268297e-05, "loss": 0.0065, "step": 10010 }, { "epoch": 7.27, "grad_norm": 1.8322253227233887, "learning_rate": 3.534253207455822e-05, "loss": 0.0231, "step": 10020 }, { "epoch": 7.28, "grad_norm": 0.12756188213825226, "learning_rate": 3.5322359396433476e-05, "loss": 0.0055, "step": 10030 }, { "epoch": 7.29, "grad_norm": 0.015612252056598663, "learning_rate": 3.5302186718308724e-05, "loss": 0.0117, "step": 10040 }, { "epoch": 7.3, "grad_norm": 0.6343337297439575, "learning_rate": 3.528201404018397e-05, "loss": 0.0311, "step": 10050 }, { "epoch": 7.3, "grad_norm": 0.020835332572460175, "learning_rate": 3.5261841362059226e-05, "loss": 0.008, "step": 10060 }, { "epoch": 7.31, "grad_norm": 0.04921965301036835, "learning_rate": 3.524166868393448e-05, "loss": 0.0096, "step": 10070 }, { "epoch": 7.32, "grad_norm": 0.031414203345775604, "learning_rate": 3.5221496005809735e-05, "loss": 0.0185, "step": 10080 }, { "epoch": 7.32, "grad_norm": 0.04260152578353882, "learning_rate": 3.520132332768499e-05, "loss": 0.0082, "step": 10090 }, { "epoch": 7.33, "grad_norm": 0.0031811215449124575, "learning_rate": 3.518115064956024e-05, "loss": 0.0101, "step": 10100 }, { "epoch": 7.34, "grad_norm": 4.260556221008301, "learning_rate": 3.5160977971435485e-05, "loss": 0.0105, "step": 10110 }, { "epoch": 7.35, "grad_norm": 0.007074627093970776, "learning_rate": 3.514080529331074e-05, "loss": 0.0194, "step": 10120 }, { "epoch": 7.35, "grad_norm": 1.2988779544830322, "learning_rate": 3.5120632615185994e-05, "loss": 0.0295, "step": 10130 }, { "epoch": 7.36, "grad_norm": 0.19852252304553986, "learning_rate": 3.510045993706125e-05, "loss": 0.0142, "step": 10140 }, { "epoch": 7.37, "grad_norm": 0.9556043744087219, "learning_rate": 3.50802872589365e-05, "loss": 0.0343, "step": 10150 }, { "epoch": 7.38, "grad_norm": 7.8187150955200195, "learning_rate": 3.506011458081175e-05, "loss": 0.0195, "step": 10160 }, { "epoch": 7.38, "grad_norm": 0.00911070965230465, "learning_rate": 3.5039941902687e-05, "loss": 0.0091, "step": 10170 }, { "epoch": 7.39, "grad_norm": 0.4240989089012146, "learning_rate": 3.501976922456225e-05, "loss": 0.0248, "step": 10180 }, { "epoch": 7.4, "grad_norm": 0.3559081256389618, "learning_rate": 3.499959654643751e-05, "loss": 0.0246, "step": 10190 }, { "epoch": 7.4, "grad_norm": 0.09215513616800308, "learning_rate": 3.497942386831276e-05, "loss": 0.0184, "step": 10200 }, { "epoch": 7.41, "grad_norm": 1.6127543449401855, "learning_rate": 3.4959251190188016e-05, "loss": 0.027, "step": 10210 }, { "epoch": 7.42, "grad_norm": 13.152901649475098, "learning_rate": 3.4939078512063264e-05, "loss": 0.0265, "step": 10220 }, { "epoch": 7.43, "grad_norm": 0.5344623923301697, "learning_rate": 3.491890583393851e-05, "loss": 0.0159, "step": 10230 }, { "epoch": 7.43, "grad_norm": 8.818399429321289, "learning_rate": 3.4898733155813766e-05, "loss": 0.0157, "step": 10240 }, { "epoch": 7.44, "grad_norm": 1.7615970373153687, "learning_rate": 3.487856047768902e-05, "loss": 0.0187, "step": 10250 }, { "epoch": 7.45, "grad_norm": 3.8972291946411133, "learning_rate": 3.4858387799564275e-05, "loss": 0.0107, "step": 10260 }, { "epoch": 7.46, "grad_norm": 10.226646423339844, "learning_rate": 3.483821512143952e-05, "loss": 0.0086, "step": 10270 }, { "epoch": 7.46, "grad_norm": 0.04844396561384201, "learning_rate": 3.481804244331478e-05, "loss": 0.0049, "step": 10280 }, { "epoch": 7.47, "grad_norm": 1.5469908714294434, "learning_rate": 3.4797869765190025e-05, "loss": 0.0088, "step": 10290 }, { "epoch": 7.48, "grad_norm": 0.038982976227998734, "learning_rate": 3.477769708706528e-05, "loss": 0.01, "step": 10300 }, { "epoch": 7.48, "grad_norm": 0.0024217732716351748, "learning_rate": 3.4757524408940534e-05, "loss": 0.0012, "step": 10310 }, { "epoch": 7.49, "grad_norm": 1.0054019689559937, "learning_rate": 3.473735173081579e-05, "loss": 0.0095, "step": 10320 }, { "epoch": 7.5, "grad_norm": 22.222614288330078, "learning_rate": 3.4717179052691036e-05, "loss": 0.012, "step": 10330 }, { "epoch": 7.51, "grad_norm": 2.256174087524414, "learning_rate": 3.469700637456629e-05, "loss": 0.014, "step": 10340 }, { "epoch": 7.51, "grad_norm": 0.5855737924575806, "learning_rate": 3.467683369644154e-05, "loss": 0.0351, "step": 10350 }, { "epoch": 7.52, "grad_norm": 0.840969443321228, "learning_rate": 3.465666101831679e-05, "loss": 0.0416, "step": 10360 }, { "epoch": 7.53, "grad_norm": 0.04065997898578644, "learning_rate": 3.463648834019205e-05, "loss": 0.0251, "step": 10370 }, { "epoch": 7.54, "grad_norm": 0.03526819124817848, "learning_rate": 3.46163156620673e-05, "loss": 0.0134, "step": 10380 }, { "epoch": 7.54, "grad_norm": 2.399366855621338, "learning_rate": 3.459614298394255e-05, "loss": 0.0036, "step": 10390 }, { "epoch": 7.55, "grad_norm": 0.23753774166107178, "learning_rate": 3.4575970305817804e-05, "loss": 0.0066, "step": 10400 }, { "epoch": 7.56, "grad_norm": 0.16471104323863983, "learning_rate": 3.455579762769305e-05, "loss": 0.0039, "step": 10410 }, { "epoch": 7.56, "grad_norm": 0.004009116906672716, "learning_rate": 3.4535624949568306e-05, "loss": 0.006, "step": 10420 }, { "epoch": 7.57, "grad_norm": 0.005728223826736212, "learning_rate": 3.451545227144356e-05, "loss": 0.0044, "step": 10430 }, { "epoch": 7.58, "grad_norm": 0.005147533491253853, "learning_rate": 3.4495279593318815e-05, "loss": 0.0068, "step": 10440 }, { "epoch": 7.59, "grad_norm": 0.09112564474344254, "learning_rate": 3.447510691519406e-05, "loss": 0.0226, "step": 10450 }, { "epoch": 7.59, "grad_norm": 0.001015618909150362, "learning_rate": 3.445493423706932e-05, "loss": 0.0263, "step": 10460 }, { "epoch": 7.6, "grad_norm": 0.006133379880338907, "learning_rate": 3.4434761558944564e-05, "loss": 0.0196, "step": 10470 }, { "epoch": 7.61, "grad_norm": 7.626928329467773, "learning_rate": 3.441458888081982e-05, "loss": 0.0122, "step": 10480 }, { "epoch": 7.62, "grad_norm": 23.610570907592773, "learning_rate": 3.439441620269507e-05, "loss": 0.0201, "step": 10490 }, { "epoch": 7.62, "grad_norm": 2.0458810329437256, "learning_rate": 3.437424352457032e-05, "loss": 0.0054, "step": 10500 }, { "epoch": 7.63, "grad_norm": 4.983373641967773, "learning_rate": 3.4354070846445576e-05, "loss": 0.021, "step": 10510 }, { "epoch": 7.64, "grad_norm": 10.006043434143066, "learning_rate": 3.433389816832083e-05, "loss": 0.0497, "step": 10520 }, { "epoch": 7.64, "grad_norm": 0.06582402437925339, "learning_rate": 3.431372549019608e-05, "loss": 0.0136, "step": 10530 }, { "epoch": 7.65, "grad_norm": 0.01676173321902752, "learning_rate": 3.429355281207133e-05, "loss": 0.0308, "step": 10540 }, { "epoch": 7.66, "grad_norm": 0.03430357575416565, "learning_rate": 3.427338013394659e-05, "loss": 0.0029, "step": 10550 }, { "epoch": 7.67, "grad_norm": 0.19467763602733612, "learning_rate": 3.4253207455821834e-05, "loss": 0.0191, "step": 10560 }, { "epoch": 7.67, "grad_norm": 0.07039833068847656, "learning_rate": 3.423303477769709e-05, "loss": 0.008, "step": 10570 }, { "epoch": 7.68, "grad_norm": 0.030894028022885323, "learning_rate": 3.421286209957234e-05, "loss": 0.0082, "step": 10580 }, { "epoch": 7.69, "grad_norm": 0.009277657605707645, "learning_rate": 3.419268942144759e-05, "loss": 0.0032, "step": 10590 }, { "epoch": 7.7, "grad_norm": 0.03260354697704315, "learning_rate": 3.4172516743322845e-05, "loss": 0.0145, "step": 10600 }, { "epoch": 7.7, "grad_norm": 0.033388249576091766, "learning_rate": 3.41523440651981e-05, "loss": 0.0006, "step": 10610 }, { "epoch": 7.71, "grad_norm": 0.0583641491830349, "learning_rate": 3.413217138707335e-05, "loss": 0.0084, "step": 10620 }, { "epoch": 7.72, "grad_norm": 0.005505802109837532, "learning_rate": 3.41119987089486e-05, "loss": 0.0125, "step": 10630 }, { "epoch": 7.72, "grad_norm": 25.258424758911133, "learning_rate": 3.4091826030823856e-05, "loss": 0.0247, "step": 10640 }, { "epoch": 7.73, "grad_norm": 0.10212866216897964, "learning_rate": 3.4071653352699104e-05, "loss": 0.0076, "step": 10650 }, { "epoch": 7.74, "grad_norm": 0.07812552899122238, "learning_rate": 3.405148067457436e-05, "loss": 0.0088, "step": 10660 }, { "epoch": 7.75, "grad_norm": 0.017161810770630836, "learning_rate": 3.403130799644961e-05, "loss": 0.0131, "step": 10670 }, { "epoch": 7.75, "grad_norm": 0.01328431349247694, "learning_rate": 3.401113531832486e-05, "loss": 0.0102, "step": 10680 }, { "epoch": 7.76, "grad_norm": 0.005470677278935909, "learning_rate": 3.3990962640200115e-05, "loss": 0.001, "step": 10690 }, { "epoch": 7.77, "grad_norm": 0.23102551698684692, "learning_rate": 3.397078996207537e-05, "loss": 0.024, "step": 10700 }, { "epoch": 7.77, "grad_norm": 1.5232537984848022, "learning_rate": 3.395061728395062e-05, "loss": 0.0082, "step": 10710 }, { "epoch": 7.78, "grad_norm": 0.37568143010139465, "learning_rate": 3.393044460582587e-05, "loss": 0.007, "step": 10720 }, { "epoch": 7.79, "grad_norm": 0.005128229968249798, "learning_rate": 3.391027192770112e-05, "loss": 0.0153, "step": 10730 }, { "epoch": 7.8, "grad_norm": 1.4863814115524292, "learning_rate": 3.3890099249576374e-05, "loss": 0.0072, "step": 10740 }, { "epoch": 7.8, "grad_norm": 0.0014930274337530136, "learning_rate": 3.386992657145163e-05, "loss": 0.0154, "step": 10750 }, { "epoch": 7.81, "grad_norm": 11.875150680541992, "learning_rate": 3.384975389332688e-05, "loss": 0.0089, "step": 10760 }, { "epoch": 7.82, "grad_norm": 0.04604551941156387, "learning_rate": 3.382958121520213e-05, "loss": 0.0028, "step": 10770 }, { "epoch": 7.83, "grad_norm": 1.3366645574569702, "learning_rate": 3.3809408537077385e-05, "loss": 0.015, "step": 10780 }, { "epoch": 7.83, "grad_norm": 12.528473854064941, "learning_rate": 3.378923585895263e-05, "loss": 0.0173, "step": 10790 }, { "epoch": 7.84, "grad_norm": 0.0017001566011458635, "learning_rate": 3.376906318082789e-05, "loss": 0.0087, "step": 10800 }, { "epoch": 7.85, "grad_norm": 0.09803108125925064, "learning_rate": 3.374889050270314e-05, "loss": 0.0018, "step": 10810 }, { "epoch": 7.85, "grad_norm": 0.22022363543510437, "learning_rate": 3.3728717824578396e-05, "loss": 0.0147, "step": 10820 }, { "epoch": 7.86, "grad_norm": 0.052362002432346344, "learning_rate": 3.3708545146453644e-05, "loss": 0.0129, "step": 10830 }, { "epoch": 7.87, "grad_norm": 0.003308740444481373, "learning_rate": 3.36883724683289e-05, "loss": 0.013, "step": 10840 }, { "epoch": 7.88, "grad_norm": 0.019734688103199005, "learning_rate": 3.3668199790204146e-05, "loss": 0.0055, "step": 10850 }, { "epoch": 7.88, "grad_norm": 4.014479160308838, "learning_rate": 3.36480271120794e-05, "loss": 0.0038, "step": 10860 }, { "epoch": 7.89, "grad_norm": 0.002026587026193738, "learning_rate": 3.3627854433954655e-05, "loss": 0.0242, "step": 10870 }, { "epoch": 7.9, "grad_norm": 1.0237423181533813, "learning_rate": 3.360768175582991e-05, "loss": 0.0299, "step": 10880 }, { "epoch": 7.91, "grad_norm": 0.0015221534995362163, "learning_rate": 3.358750907770516e-05, "loss": 0.0091, "step": 10890 }, { "epoch": 7.91, "grad_norm": 0.021977558732032776, "learning_rate": 3.356733639958041e-05, "loss": 0.0288, "step": 10900 }, { "epoch": 7.92, "grad_norm": 12.71220588684082, "learning_rate": 3.354716372145566e-05, "loss": 0.0345, "step": 10910 }, { "epoch": 7.93, "grad_norm": 0.050337791442871094, "learning_rate": 3.3526991043330914e-05, "loss": 0.0022, "step": 10920 }, { "epoch": 7.93, "grad_norm": 11.188690185546875, "learning_rate": 3.350681836520617e-05, "loss": 0.0095, "step": 10930 }, { "epoch": 7.94, "grad_norm": 0.6816732883453369, "learning_rate": 3.348664568708142e-05, "loss": 0.0196, "step": 10940 }, { "epoch": 7.95, "grad_norm": 2.0615453720092773, "learning_rate": 3.346647300895667e-05, "loss": 0.0081, "step": 10950 }, { "epoch": 7.96, "grad_norm": 6.260568618774414, "learning_rate": 3.3446300330831925e-05, "loss": 0.0155, "step": 10960 }, { "epoch": 7.96, "grad_norm": 0.013660675846040249, "learning_rate": 3.342612765270717e-05, "loss": 0.0061, "step": 10970 }, { "epoch": 7.97, "grad_norm": 16.132661819458008, "learning_rate": 3.340595497458243e-05, "loss": 0.0249, "step": 10980 }, { "epoch": 7.98, "grad_norm": 0.0729353055357933, "learning_rate": 3.338578229645768e-05, "loss": 0.0021, "step": 10990 }, { "epoch": 7.99, "grad_norm": 2.324553966522217, "learning_rate": 3.336560961833293e-05, "loss": 0.0111, "step": 11000 }, { "epoch": 7.99, "grad_norm": 0.08047934621572495, "learning_rate": 3.3345436940208184e-05, "loss": 0.0029, "step": 11010 }, { "epoch": 8.0, "grad_norm": 0.0027567476499825716, "learning_rate": 3.332526426208343e-05, "loss": 0.0054, "step": 11020 }, { "epoch": 8.0, "eval_accuracy": 0.997799155964968, "eval_f1": 0.9949188056574123, "eval_loss": 0.005811151582747698, "eval_precision": 0.9983704793944491, "eval_recall": 0.9914909166840676, "eval_roc_auc": 0.9999673239643861, "eval_runtime": 387.519, "eval_samples_per_second": 227.468, "eval_steps_per_second": 14.219, "step": 11020 }, { "epoch": 8.01, "grad_norm": 0.16821260750293732, "learning_rate": 3.3305091583958686e-05, "loss": 0.0269, "step": 11030 }, { "epoch": 8.01, "grad_norm": 1.4832972288131714, "learning_rate": 3.328491890583394e-05, "loss": 0.0073, "step": 11040 }, { "epoch": 8.02, "grad_norm": 2.289891481399536, "learning_rate": 3.3264746227709195e-05, "loss": 0.0065, "step": 11050 }, { "epoch": 8.03, "grad_norm": 0.000776061206124723, "learning_rate": 3.324457354958444e-05, "loss": 0.0117, "step": 11060 }, { "epoch": 8.04, "grad_norm": 0.016198599711060524, "learning_rate": 3.32244008714597e-05, "loss": 0.0202, "step": 11070 }, { "epoch": 8.04, "grad_norm": 2.553449869155884, "learning_rate": 3.3204228193334945e-05, "loss": 0.0037, "step": 11080 }, { "epoch": 8.05, "grad_norm": 0.055435068905353546, "learning_rate": 3.31840555152102e-05, "loss": 0.0372, "step": 11090 }, { "epoch": 8.06, "grad_norm": 13.744915008544922, "learning_rate": 3.3163882837085453e-05, "loss": 0.031, "step": 11100 }, { "epoch": 8.07, "grad_norm": 42.31868362426758, "learning_rate": 3.314371015896071e-05, "loss": 0.0065, "step": 11110 }, { "epoch": 8.07, "grad_norm": 0.022761182859539986, "learning_rate": 3.3123537480835956e-05, "loss": 0.0048, "step": 11120 }, { "epoch": 8.08, "grad_norm": 0.06478892266750336, "learning_rate": 3.310336480271121e-05, "loss": 0.0137, "step": 11130 }, { "epoch": 8.09, "grad_norm": 3.4087636470794678, "learning_rate": 3.308319212458646e-05, "loss": 0.0112, "step": 11140 }, { "epoch": 8.09, "grad_norm": 0.024201085790991783, "learning_rate": 3.306301944646171e-05, "loss": 0.0251, "step": 11150 }, { "epoch": 8.1, "grad_norm": 0.003428951371461153, "learning_rate": 3.304284676833697e-05, "loss": 0.0184, "step": 11160 }, { "epoch": 8.11, "grad_norm": 0.10663584619760513, "learning_rate": 3.302267409021222e-05, "loss": 0.019, "step": 11170 }, { "epoch": 8.12, "grad_norm": 0.07480733841657639, "learning_rate": 3.300250141208747e-05, "loss": 0.0172, "step": 11180 }, { "epoch": 8.12, "grad_norm": 0.0026008952409029007, "learning_rate": 3.298232873396272e-05, "loss": 0.0243, "step": 11190 }, { "epoch": 8.13, "grad_norm": 30.776805877685547, "learning_rate": 3.296215605583797e-05, "loss": 0.0103, "step": 11200 }, { "epoch": 8.14, "grad_norm": 0.0013422481715679169, "learning_rate": 3.2941983377713225e-05, "loss": 0.0033, "step": 11210 }, { "epoch": 8.15, "grad_norm": 0.008392914198338985, "learning_rate": 3.292181069958848e-05, "loss": 0.0094, "step": 11220 }, { "epoch": 8.15, "grad_norm": 0.03945827856659889, "learning_rate": 3.2901638021463734e-05, "loss": 0.0032, "step": 11230 }, { "epoch": 8.16, "grad_norm": 0.002708829240873456, "learning_rate": 3.288146534333898e-05, "loss": 0.0006, "step": 11240 }, { "epoch": 8.17, "grad_norm": 0.02194291725754738, "learning_rate": 3.286129266521423e-05, "loss": 0.0101, "step": 11250 }, { "epoch": 8.17, "grad_norm": 5.216897487640381, "learning_rate": 3.2841119987089484e-05, "loss": 0.0136, "step": 11260 }, { "epoch": 8.18, "grad_norm": 0.004165074788033962, "learning_rate": 3.282094730896474e-05, "loss": 0.0101, "step": 11270 }, { "epoch": 8.19, "grad_norm": 1.777703881263733, "learning_rate": 3.280077463083999e-05, "loss": 0.0139, "step": 11280 }, { "epoch": 8.2, "grad_norm": 0.010176170617341995, "learning_rate": 3.278060195271525e-05, "loss": 0.0141, "step": 11290 }, { "epoch": 8.2, "grad_norm": 0.01507497113198042, "learning_rate": 3.2760429274590495e-05, "loss": 0.0156, "step": 11300 }, { "epoch": 8.21, "grad_norm": 0.09720058739185333, "learning_rate": 3.274025659646574e-05, "loss": 0.0088, "step": 11310 }, { "epoch": 8.22, "grad_norm": 3.414900302886963, "learning_rate": 3.2720083918341e-05, "loss": 0.023, "step": 11320 }, { "epoch": 8.23, "grad_norm": 0.5592748522758484, "learning_rate": 3.269991124021625e-05, "loss": 0.0056, "step": 11330 }, { "epoch": 8.23, "grad_norm": 16.837783813476562, "learning_rate": 3.2679738562091506e-05, "loss": 0.0319, "step": 11340 }, { "epoch": 8.24, "grad_norm": 0.012670857831835747, "learning_rate": 3.265956588396676e-05, "loss": 0.0026, "step": 11350 }, { "epoch": 8.25, "grad_norm": 0.002103676088154316, "learning_rate": 3.263939320584201e-05, "loss": 0.0131, "step": 11360 }, { "epoch": 8.25, "grad_norm": 0.10566865652799606, "learning_rate": 3.2619220527717256e-05, "loss": 0.0063, "step": 11370 }, { "epoch": 8.26, "grad_norm": 27.73931121826172, "learning_rate": 3.259904784959251e-05, "loss": 0.0048, "step": 11380 }, { "epoch": 8.27, "grad_norm": 0.009989196434617043, "learning_rate": 3.2578875171467765e-05, "loss": 0.0119, "step": 11390 }, { "epoch": 8.28, "grad_norm": 0.07179460674524307, "learning_rate": 3.255870249334302e-05, "loss": 0.0158, "step": 11400 }, { "epoch": 8.28, "grad_norm": 0.017826635390520096, "learning_rate": 3.2538529815218274e-05, "loss": 0.014, "step": 11410 }, { "epoch": 8.29, "grad_norm": 0.00853447150439024, "learning_rate": 3.251835713709352e-05, "loss": 0.0163, "step": 11420 }, { "epoch": 8.3, "grad_norm": 0.39563441276550293, "learning_rate": 3.249818445896877e-05, "loss": 0.0111, "step": 11430 }, { "epoch": 8.3, "grad_norm": 0.03939186409115791, "learning_rate": 3.2478011780844024e-05, "loss": 0.0224, "step": 11440 }, { "epoch": 8.31, "grad_norm": 0.8431787490844727, "learning_rate": 3.245783910271928e-05, "loss": 0.0266, "step": 11450 }, { "epoch": 8.32, "grad_norm": 0.07643716782331467, "learning_rate": 3.243766642459453e-05, "loss": 0.0124, "step": 11460 }, { "epoch": 8.33, "grad_norm": 0.042288776487112045, "learning_rate": 3.241749374646979e-05, "loss": 0.0184, "step": 11470 }, { "epoch": 8.33, "grad_norm": 1.1074130535125732, "learning_rate": 3.2397321068345035e-05, "loss": 0.0248, "step": 11480 }, { "epoch": 8.34, "grad_norm": 0.23461972177028656, "learning_rate": 3.237714839022028e-05, "loss": 0.0118, "step": 11490 }, { "epoch": 8.35, "grad_norm": 0.008372629061341286, "learning_rate": 3.235697571209554e-05, "loss": 0.0153, "step": 11500 }, { "epoch": 8.36, "grad_norm": 0.011258935555815697, "learning_rate": 3.233680303397079e-05, "loss": 0.0105, "step": 11510 }, { "epoch": 8.36, "grad_norm": 0.16763925552368164, "learning_rate": 3.2316630355846046e-05, "loss": 0.0119, "step": 11520 }, { "epoch": 8.37, "grad_norm": 0.014371280558407307, "learning_rate": 3.22964576777213e-05, "loss": 0.0054, "step": 11530 }, { "epoch": 8.38, "grad_norm": 19.372220993041992, "learning_rate": 3.227628499959655e-05, "loss": 0.0063, "step": 11540 }, { "epoch": 8.38, "grad_norm": 6.232364177703857, "learning_rate": 3.2256112321471796e-05, "loss": 0.0048, "step": 11550 }, { "epoch": 8.39, "grad_norm": 0.2103765457868576, "learning_rate": 3.223593964334705e-05, "loss": 0.0084, "step": 11560 }, { "epoch": 8.4, "grad_norm": 0.4541609585285187, "learning_rate": 3.2215766965222305e-05, "loss": 0.0097, "step": 11570 }, { "epoch": 8.41, "grad_norm": 0.05270016938447952, "learning_rate": 3.219559428709756e-05, "loss": 0.0023, "step": 11580 }, { "epoch": 8.41, "grad_norm": 0.0025290350895375013, "learning_rate": 3.2175421608972814e-05, "loss": 0.0154, "step": 11590 }, { "epoch": 8.42, "grad_norm": 0.0017237365245819092, "learning_rate": 3.215524893084806e-05, "loss": 0.0301, "step": 11600 }, { "epoch": 8.43, "grad_norm": 0.15763328969478607, "learning_rate": 3.213507625272331e-05, "loss": 0.0099, "step": 11610 }, { "epoch": 8.44, "grad_norm": 1.8129161596298218, "learning_rate": 3.2114903574598564e-05, "loss": 0.0114, "step": 11620 }, { "epoch": 8.44, "grad_norm": 0.22685779631137848, "learning_rate": 3.209473089647382e-05, "loss": 0.0089, "step": 11630 }, { "epoch": 8.45, "grad_norm": 0.04477900266647339, "learning_rate": 3.207455821834907e-05, "loss": 0.0156, "step": 11640 }, { "epoch": 8.46, "grad_norm": 0.43626195192337036, "learning_rate": 3.205438554022433e-05, "loss": 0.0023, "step": 11650 }, { "epoch": 8.46, "grad_norm": 19.21158218383789, "learning_rate": 3.2034212862099575e-05, "loss": 0.023, "step": 11660 }, { "epoch": 8.47, "grad_norm": 1.7743921279907227, "learning_rate": 3.201404018397482e-05, "loss": 0.0072, "step": 11670 }, { "epoch": 8.48, "grad_norm": 0.4966103434562683, "learning_rate": 3.199386750585008e-05, "loss": 0.0053, "step": 11680 }, { "epoch": 8.49, "grad_norm": 0.0037831587251275778, "learning_rate": 3.197369482772533e-05, "loss": 0.015, "step": 11690 }, { "epoch": 8.49, "grad_norm": 28.9018611907959, "learning_rate": 3.1953522149600586e-05, "loss": 0.0163, "step": 11700 }, { "epoch": 8.5, "grad_norm": 0.0005793919553980231, "learning_rate": 3.1933349471475834e-05, "loss": 0.0044, "step": 11710 }, { "epoch": 8.51, "grad_norm": 24.365251541137695, "learning_rate": 3.191317679335109e-05, "loss": 0.0094, "step": 11720 }, { "epoch": 8.52, "grad_norm": 0.0010113256284967065, "learning_rate": 3.1893004115226336e-05, "loss": 0.0197, "step": 11730 }, { "epoch": 8.52, "grad_norm": 0.08384249359369278, "learning_rate": 3.187283143710159e-05, "loss": 0.0065, "step": 11740 }, { "epoch": 8.53, "grad_norm": 0.10842813551425934, "learning_rate": 3.1852658758976845e-05, "loss": 0.003, "step": 11750 }, { "epoch": 8.54, "grad_norm": 1.8701107501983643, "learning_rate": 3.18324860808521e-05, "loss": 0.0206, "step": 11760 }, { "epoch": 8.54, "grad_norm": 7.070690631866455, "learning_rate": 3.181231340272735e-05, "loss": 0.0176, "step": 11770 }, { "epoch": 8.55, "grad_norm": 0.00601642020046711, "learning_rate": 3.17921407246026e-05, "loss": 0.0218, "step": 11780 }, { "epoch": 8.56, "grad_norm": 2.2268316745758057, "learning_rate": 3.177196804647785e-05, "loss": 0.0018, "step": 11790 }, { "epoch": 8.57, "grad_norm": 6.753377437591553, "learning_rate": 3.1751795368353103e-05, "loss": 0.0041, "step": 11800 }, { "epoch": 8.57, "grad_norm": 9.824979782104492, "learning_rate": 3.173162269022836e-05, "loss": 0.0103, "step": 11810 }, { "epoch": 8.58, "grad_norm": 0.2134261578321457, "learning_rate": 3.171145001210361e-05, "loss": 0.0044, "step": 11820 }, { "epoch": 8.59, "grad_norm": 0.22093412280082703, "learning_rate": 3.169127733397886e-05, "loss": 0.0092, "step": 11830 }, { "epoch": 8.6, "grad_norm": 4.783883571624756, "learning_rate": 3.1671104655854115e-05, "loss": 0.0188, "step": 11840 }, { "epoch": 8.6, "grad_norm": 0.42982345819473267, "learning_rate": 3.165093197772936e-05, "loss": 0.0481, "step": 11850 }, { "epoch": 8.61, "grad_norm": 23.18255043029785, "learning_rate": 3.163075929960462e-05, "loss": 0.0046, "step": 11860 }, { "epoch": 8.62, "grad_norm": 6.138206481933594, "learning_rate": 3.161058662147987e-05, "loss": 0.018, "step": 11870 }, { "epoch": 8.62, "grad_norm": 0.3428427278995514, "learning_rate": 3.1590413943355126e-05, "loss": 0.0143, "step": 11880 }, { "epoch": 8.63, "grad_norm": 0.7229692339897156, "learning_rate": 3.157024126523037e-05, "loss": 0.0054, "step": 11890 }, { "epoch": 8.64, "grad_norm": 0.011998855508863926, "learning_rate": 3.155006858710563e-05, "loss": 0.009, "step": 11900 }, { "epoch": 8.65, "grad_norm": 0.0009840866550803185, "learning_rate": 3.1529895908980875e-05, "loss": 0.0202, "step": 11910 }, { "epoch": 8.65, "grad_norm": 0.0013845226494595408, "learning_rate": 3.150972323085613e-05, "loss": 0.0088, "step": 11920 }, { "epoch": 8.66, "grad_norm": 14.614180564880371, "learning_rate": 3.1489550552731384e-05, "loss": 0.0148, "step": 11930 }, { "epoch": 8.67, "grad_norm": 27.177169799804688, "learning_rate": 3.146937787460664e-05, "loss": 0.0041, "step": 11940 }, { "epoch": 8.68, "grad_norm": 0.06443148106336594, "learning_rate": 3.1449205196481887e-05, "loss": 0.0041, "step": 11950 }, { "epoch": 8.68, "grad_norm": 0.003651421284303069, "learning_rate": 3.142903251835714e-05, "loss": 0.0079, "step": 11960 }, { "epoch": 8.69, "grad_norm": 0.004412221256643534, "learning_rate": 3.140885984023239e-05, "loss": 0.0093, "step": 11970 }, { "epoch": 8.7, "grad_norm": 0.5715083479881287, "learning_rate": 3.138868716210764e-05, "loss": 0.0066, "step": 11980 }, { "epoch": 8.7, "grad_norm": 0.03544891998171806, "learning_rate": 3.13685144839829e-05, "loss": 0.0164, "step": 11990 }, { "epoch": 8.71, "grad_norm": 0.0518750362098217, "learning_rate": 3.1348341805858145e-05, "loss": 0.0086, "step": 12000 }, { "epoch": 8.72, "grad_norm": 1.9587420225143433, "learning_rate": 3.13281691277334e-05, "loss": 0.0286, "step": 12010 }, { "epoch": 8.73, "grad_norm": 0.004830994643270969, "learning_rate": 3.1307996449608654e-05, "loss": 0.0157, "step": 12020 }, { "epoch": 8.73, "grad_norm": 0.05058489367365837, "learning_rate": 3.12878237714839e-05, "loss": 0.0061, "step": 12030 }, { "epoch": 8.74, "grad_norm": 0.13220369815826416, "learning_rate": 3.1267651093359156e-05, "loss": 0.0234, "step": 12040 }, { "epoch": 8.75, "grad_norm": 0.6112781763076782, "learning_rate": 3.124747841523441e-05, "loss": 0.0067, "step": 12050 }, { "epoch": 8.75, "grad_norm": 0.3486891984939575, "learning_rate": 3.122730573710966e-05, "loss": 0.0012, "step": 12060 }, { "epoch": 8.76, "grad_norm": 0.04058101028203964, "learning_rate": 3.120713305898491e-05, "loss": 0.0242, "step": 12070 }, { "epoch": 8.77, "grad_norm": 0.05213288590312004, "learning_rate": 3.118696038086016e-05, "loss": 0.0286, "step": 12080 }, { "epoch": 8.78, "grad_norm": 0.053065940737724304, "learning_rate": 3.1166787702735415e-05, "loss": 0.0022, "step": 12090 }, { "epoch": 8.78, "grad_norm": 10.85990047454834, "learning_rate": 3.114661502461067e-05, "loss": 0.0105, "step": 12100 }, { "epoch": 8.79, "grad_norm": 0.009069069288671017, "learning_rate": 3.1126442346485924e-05, "loss": 0.0024, "step": 12110 }, { "epoch": 8.8, "grad_norm": 0.0020809718407690525, "learning_rate": 3.110626966836117e-05, "loss": 0.0047, "step": 12120 }, { "epoch": 8.81, "grad_norm": 0.2555220425128937, "learning_rate": 3.1086096990236426e-05, "loss": 0.0118, "step": 12130 }, { "epoch": 8.81, "grad_norm": 0.05784722417593002, "learning_rate": 3.1065924312111674e-05, "loss": 0.0023, "step": 12140 }, { "epoch": 8.82, "grad_norm": 0.035568490624427795, "learning_rate": 3.104575163398693e-05, "loss": 0.0307, "step": 12150 }, { "epoch": 8.83, "grad_norm": 0.08019623160362244, "learning_rate": 3.102557895586218e-05, "loss": 0.017, "step": 12160 }, { "epoch": 8.83, "grad_norm": 5.698986530303955, "learning_rate": 3.100540627773744e-05, "loss": 0.0115, "step": 12170 }, { "epoch": 8.84, "grad_norm": 0.02579701505601406, "learning_rate": 3.0985233599612685e-05, "loss": 0.0105, "step": 12180 }, { "epoch": 8.85, "grad_norm": 0.0029477495700120926, "learning_rate": 3.096506092148794e-05, "loss": 0.0027, "step": 12190 }, { "epoch": 8.86, "grad_norm": 0.09123173356056213, "learning_rate": 3.094488824336319e-05, "loss": 0.0118, "step": 12200 }, { "epoch": 8.86, "grad_norm": 0.13705745339393616, "learning_rate": 3.092471556523844e-05, "loss": 0.0191, "step": 12210 }, { "epoch": 8.87, "grad_norm": 0.0693451315164566, "learning_rate": 3.0904542887113696e-05, "loss": 0.0141, "step": 12220 }, { "epoch": 8.88, "grad_norm": 9.002408027648926, "learning_rate": 3.0884370208988944e-05, "loss": 0.014, "step": 12230 }, { "epoch": 8.89, "grad_norm": 0.02591705694794655, "learning_rate": 3.08641975308642e-05, "loss": 0.0028, "step": 12240 }, { "epoch": 8.89, "grad_norm": 0.47011977434158325, "learning_rate": 3.084402485273945e-05, "loss": 0.0043, "step": 12250 }, { "epoch": 8.9, "grad_norm": 0.09690820425748825, "learning_rate": 3.08238521746147e-05, "loss": 0.0018, "step": 12260 }, { "epoch": 8.91, "grad_norm": 0.21148131787776947, "learning_rate": 3.0803679496489955e-05, "loss": 0.0007, "step": 12270 }, { "epoch": 8.91, "grad_norm": 0.0025974249001592398, "learning_rate": 3.078350681836521e-05, "loss": 0.0174, "step": 12280 }, { "epoch": 8.92, "grad_norm": 0.03931077942252159, "learning_rate": 3.076333414024046e-05, "loss": 0.0039, "step": 12290 }, { "epoch": 8.93, "grad_norm": 7.300892353057861, "learning_rate": 3.074316146211571e-05, "loss": 0.008, "step": 12300 }, { "epoch": 8.94, "grad_norm": 0.007567646913230419, "learning_rate": 3.0722988783990966e-05, "loss": 0.0123, "step": 12310 }, { "epoch": 8.94, "grad_norm": 38.95884323120117, "learning_rate": 3.0702816105866214e-05, "loss": 0.0059, "step": 12320 }, { "epoch": 8.95, "grad_norm": 0.0021059729624539614, "learning_rate": 3.068264342774147e-05, "loss": 0.0362, "step": 12330 }, { "epoch": 8.96, "grad_norm": 0.04417746886610985, "learning_rate": 3.066247074961672e-05, "loss": 0.0035, "step": 12340 }, { "epoch": 8.97, "grad_norm": 0.7246804237365723, "learning_rate": 3.064229807149197e-05, "loss": 0.0395, "step": 12350 }, { "epoch": 8.97, "grad_norm": 3.860435724258423, "learning_rate": 3.0622125393367225e-05, "loss": 0.0045, "step": 12360 }, { "epoch": 8.98, "grad_norm": 0.16252577304840088, "learning_rate": 3.060195271524248e-05, "loss": 0.0015, "step": 12370 }, { "epoch": 8.99, "grad_norm": 4.022401332855225, "learning_rate": 3.058178003711773e-05, "loss": 0.0086, "step": 12380 }, { "epoch": 8.99, "grad_norm": 0.0879194512963295, "learning_rate": 3.056160735899298e-05, "loss": 0.0237, "step": 12390 }, { "epoch": 9.0, "eval_accuracy": 0.9975495757135726, "eval_f1": 0.994333980378784, "eval_loss": 0.006345591973513365, "eval_precision": 0.9993145629020352, "eval_recall": 0.9894027980789308, "eval_roc_auc": 0.9999658938912749, "eval_runtime": 387.414, "eval_samples_per_second": 227.529, "eval_steps_per_second": 14.223, "step": 12397 }, { "epoch": 9.0, "grad_norm": 0.1917732059955597, "learning_rate": 3.0541434680868236e-05, "loss": 0.0101, "step": 12400 }, { "epoch": 9.01, "grad_norm": 0.033312708139419556, "learning_rate": 3.0521262002743484e-05, "loss": 0.0021, "step": 12410 }, { "epoch": 9.02, "grad_norm": 0.021154897287487984, "learning_rate": 3.0501089324618738e-05, "loss": 0.0073, "step": 12420 }, { "epoch": 9.02, "grad_norm": 0.05219145119190216, "learning_rate": 3.048091664649399e-05, "loss": 0.0023, "step": 12430 }, { "epoch": 9.03, "grad_norm": 0.0031368627678602934, "learning_rate": 3.0460743968369244e-05, "loss": 0.0215, "step": 12440 }, { "epoch": 9.04, "grad_norm": 0.0007551907910965383, "learning_rate": 3.0440571290244495e-05, "loss": 0.0236, "step": 12450 }, { "epoch": 9.05, "grad_norm": 0.19673331081867218, "learning_rate": 3.0420398612119742e-05, "loss": 0.0061, "step": 12460 }, { "epoch": 9.05, "grad_norm": 0.003755633719265461, "learning_rate": 3.0400225933994997e-05, "loss": 0.0052, "step": 12470 }, { "epoch": 9.06, "grad_norm": 0.8004059195518494, "learning_rate": 3.038005325587025e-05, "loss": 0.0337, "step": 12480 }, { "epoch": 9.07, "grad_norm": 15.695157051086426, "learning_rate": 3.0359880577745502e-05, "loss": 0.0083, "step": 12490 }, { "epoch": 9.07, "grad_norm": 6.738131046295166, "learning_rate": 3.0339707899620757e-05, "loss": 0.0119, "step": 12500 }, { "epoch": 9.08, "grad_norm": 0.03489381819963455, "learning_rate": 3.0319535221496008e-05, "loss": 0.0037, "step": 12510 }, { "epoch": 9.09, "grad_norm": 0.353909969329834, "learning_rate": 3.0299362543371256e-05, "loss": 0.0159, "step": 12520 }, { "epoch": 9.1, "grad_norm": 0.3446331024169922, "learning_rate": 3.027918986524651e-05, "loss": 0.0058, "step": 12530 }, { "epoch": 9.1, "grad_norm": 1.5575968027114868, "learning_rate": 3.0259017187121764e-05, "loss": 0.0184, "step": 12540 }, { "epoch": 9.11, "grad_norm": 0.026695841923356056, "learning_rate": 3.0238844508997016e-05, "loss": 0.0014, "step": 12550 }, { "epoch": 9.12, "grad_norm": 0.006738999392837286, "learning_rate": 3.021867183087227e-05, "loss": 0.0122, "step": 12560 }, { "epoch": 9.13, "grad_norm": 1.860974907875061, "learning_rate": 3.019849915274752e-05, "loss": 0.0052, "step": 12570 }, { "epoch": 9.13, "grad_norm": 0.29735755920410156, "learning_rate": 3.017832647462277e-05, "loss": 0.0071, "step": 12580 }, { "epoch": 9.14, "grad_norm": 0.08206071704626083, "learning_rate": 3.0158153796498023e-05, "loss": 0.0148, "step": 12590 }, { "epoch": 9.15, "grad_norm": 1.398136854171753, "learning_rate": 3.0137981118373278e-05, "loss": 0.0049, "step": 12600 }, { "epoch": 9.15, "grad_norm": 0.42362233996391296, "learning_rate": 3.011780844024853e-05, "loss": 0.0098, "step": 12610 }, { "epoch": 9.16, "grad_norm": 0.08972881734371185, "learning_rate": 3.0097635762123783e-05, "loss": 0.0269, "step": 12620 }, { "epoch": 9.17, "grad_norm": 6.95635461807251, "learning_rate": 3.0077463083999034e-05, "loss": 0.0204, "step": 12630 }, { "epoch": 9.18, "grad_norm": 44.25766372680664, "learning_rate": 3.0057290405874282e-05, "loss": 0.0097, "step": 12640 }, { "epoch": 9.18, "grad_norm": 0.01075151190161705, "learning_rate": 3.0037117727749536e-05, "loss": 0.0029, "step": 12650 }, { "epoch": 9.19, "grad_norm": 2.1552395820617676, "learning_rate": 3.001694504962479e-05, "loss": 0.0188, "step": 12660 }, { "epoch": 9.2, "grad_norm": 18.27303123474121, "learning_rate": 2.9996772371500042e-05, "loss": 0.0159, "step": 12670 }, { "epoch": 9.21, "grad_norm": 0.42549946904182434, "learning_rate": 2.9976599693375296e-05, "loss": 0.0112, "step": 12680 }, { "epoch": 9.21, "grad_norm": 0.16241183876991272, "learning_rate": 2.9956427015250548e-05, "loss": 0.0057, "step": 12690 }, { "epoch": 9.22, "grad_norm": 5.789508819580078, "learning_rate": 2.9936254337125795e-05, "loss": 0.0115, "step": 12700 }, { "epoch": 9.23, "grad_norm": 0.16165970265865326, "learning_rate": 2.991608165900105e-05, "loss": 0.0083, "step": 12710 }, { "epoch": 9.23, "grad_norm": 0.010464129038155079, "learning_rate": 2.9895908980876304e-05, "loss": 0.001, "step": 12720 }, { "epoch": 9.24, "grad_norm": 0.06300076097249985, "learning_rate": 2.9875736302751555e-05, "loss": 0.0098, "step": 12730 }, { "epoch": 9.25, "grad_norm": 7.2553324699401855, "learning_rate": 2.985556362462681e-05, "loss": 0.0231, "step": 12740 }, { "epoch": 9.26, "grad_norm": 0.001716342754662037, "learning_rate": 2.9835390946502057e-05, "loss": 0.0113, "step": 12750 }, { "epoch": 9.26, "grad_norm": 3.9360616207122803, "learning_rate": 2.981521826837731e-05, "loss": 0.0246, "step": 12760 }, { "epoch": 9.27, "grad_norm": 0.5541887283325195, "learning_rate": 2.9795045590252563e-05, "loss": 0.0042, "step": 12770 }, { "epoch": 9.28, "grad_norm": 0.0037244977429509163, "learning_rate": 2.9774872912127817e-05, "loss": 0.0046, "step": 12780 }, { "epoch": 9.28, "grad_norm": 0.008533765561878681, "learning_rate": 2.975470023400307e-05, "loss": 0.0028, "step": 12790 }, { "epoch": 9.29, "grad_norm": 0.005289971828460693, "learning_rate": 2.9734527555878323e-05, "loss": 0.0027, "step": 12800 }, { "epoch": 9.3, "grad_norm": 0.0009858094854280353, "learning_rate": 2.971435487775357e-05, "loss": 0.003, "step": 12810 }, { "epoch": 9.31, "grad_norm": 0.00409921258687973, "learning_rate": 2.9694182199628822e-05, "loss": 0.0021, "step": 12820 }, { "epoch": 9.31, "grad_norm": 0.0007549124420620501, "learning_rate": 2.9674009521504076e-05, "loss": 0.0054, "step": 12830 }, { "epoch": 9.32, "grad_norm": 0.10273166000843048, "learning_rate": 2.965383684337933e-05, "loss": 0.0049, "step": 12840 }, { "epoch": 9.33, "grad_norm": 0.1592969447374344, "learning_rate": 2.9633664165254582e-05, "loss": 0.0006, "step": 12850 }, { "epoch": 9.34, "grad_norm": 0.002123448997735977, "learning_rate": 2.9613491487129836e-05, "loss": 0.0016, "step": 12860 }, { "epoch": 9.34, "grad_norm": 5.097808361053467, "learning_rate": 2.9593318809005084e-05, "loss": 0.0085, "step": 12870 }, { "epoch": 9.35, "grad_norm": 0.41327035427093506, "learning_rate": 2.9573146130880335e-05, "loss": 0.0014, "step": 12880 }, { "epoch": 9.36, "grad_norm": 0.0021131192333996296, "learning_rate": 2.955297345275559e-05, "loss": 0.0093, "step": 12890 }, { "epoch": 9.36, "grad_norm": 0.36018940806388855, "learning_rate": 2.953280077463084e-05, "loss": 0.0058, "step": 12900 }, { "epoch": 9.37, "grad_norm": 0.0010047269752249122, "learning_rate": 2.9512628096506095e-05, "loss": 0.0024, "step": 12910 }, { "epoch": 9.38, "grad_norm": 0.13996048271656036, "learning_rate": 2.949245541838135e-05, "loss": 0.0095, "step": 12920 }, { "epoch": 9.39, "grad_norm": 0.0009951372630894184, "learning_rate": 2.9472282740256597e-05, "loss": 0.0017, "step": 12930 }, { "epoch": 9.39, "grad_norm": 0.0007663732394576073, "learning_rate": 2.9452110062131848e-05, "loss": 0.0431, "step": 12940 }, { "epoch": 9.4, "grad_norm": 0.0016060832422226667, "learning_rate": 2.9431937384007103e-05, "loss": 0.0026, "step": 12950 }, { "epoch": 9.41, "grad_norm": 0.09539427608251572, "learning_rate": 2.9411764705882354e-05, "loss": 0.0056, "step": 12960 }, { "epoch": 9.42, "grad_norm": 0.5911301970481873, "learning_rate": 2.9391592027757608e-05, "loss": 0.0062, "step": 12970 }, { "epoch": 9.42, "grad_norm": 0.12249241769313812, "learning_rate": 2.9371419349632856e-05, "loss": 0.0049, "step": 12980 }, { "epoch": 9.43, "grad_norm": 0.05083388090133667, "learning_rate": 2.935124667150811e-05, "loss": 0.0016, "step": 12990 }, { "epoch": 9.44, "grad_norm": 0.004344270098954439, "learning_rate": 2.933107399338336e-05, "loss": 0.01, "step": 13000 }, { "epoch": 9.44, "grad_norm": 0.003572734771296382, "learning_rate": 2.9310901315258616e-05, "loss": 0.0003, "step": 13010 }, { "epoch": 9.45, "grad_norm": 2.558148145675659, "learning_rate": 2.9290728637133867e-05, "loss": 0.0101, "step": 13020 }, { "epoch": 9.46, "grad_norm": 0.03010840341448784, "learning_rate": 2.927055595900912e-05, "loss": 0.0191, "step": 13030 }, { "epoch": 9.47, "grad_norm": 0.05662524327635765, "learning_rate": 2.925038328088437e-05, "loss": 0.0144, "step": 13040 }, { "epoch": 9.47, "grad_norm": 0.00602317601442337, "learning_rate": 2.9230210602759624e-05, "loss": 0.0081, "step": 13050 }, { "epoch": 9.48, "grad_norm": 0.061507437378168106, "learning_rate": 2.9210037924634875e-05, "loss": 0.0129, "step": 13060 }, { "epoch": 9.49, "grad_norm": 0.0022451505064964294, "learning_rate": 2.918986524651013e-05, "loss": 0.0063, "step": 13070 }, { "epoch": 9.5, "grad_norm": 8.346963882446289, "learning_rate": 2.916969256838538e-05, "loss": 0.0078, "step": 13080 }, { "epoch": 9.5, "grad_norm": 0.0015550401294603944, "learning_rate": 2.9149519890260635e-05, "loss": 0.0249, "step": 13090 }, { "epoch": 9.51, "grad_norm": 0.03434576839208603, "learning_rate": 2.9129347212135882e-05, "loss": 0.0215, "step": 13100 }, { "epoch": 9.52, "grad_norm": 24.404510498046875, "learning_rate": 2.9109174534011137e-05, "loss": 0.0174, "step": 13110 }, { "epoch": 9.52, "grad_norm": 0.008947799913585186, "learning_rate": 2.9089001855886388e-05, "loss": 0.0039, "step": 13120 }, { "epoch": 9.53, "grad_norm": 0.039463091641664505, "learning_rate": 2.9068829177761642e-05, "loss": 0.0051, "step": 13130 }, { "epoch": 9.54, "grad_norm": 0.052592430263757706, "learning_rate": 2.9048656499636893e-05, "loss": 0.0052, "step": 13140 }, { "epoch": 9.55, "grad_norm": 0.010807220824062824, "learning_rate": 2.9028483821512148e-05, "loss": 0.0013, "step": 13150 }, { "epoch": 9.55, "grad_norm": 0.003055947832763195, "learning_rate": 2.9008311143387396e-05, "loss": 0.0137, "step": 13160 }, { "epoch": 9.56, "grad_norm": 0.23203665018081665, "learning_rate": 2.898813846526265e-05, "loss": 0.0062, "step": 13170 }, { "epoch": 9.57, "grad_norm": 0.012158505618572235, "learning_rate": 2.89679657871379e-05, "loss": 0.0162, "step": 13180 }, { "epoch": 9.58, "grad_norm": 0.08189103752374649, "learning_rate": 2.8947793109013156e-05, "loss": 0.0207, "step": 13190 }, { "epoch": 9.58, "grad_norm": 12.099898338317871, "learning_rate": 2.8927620430888407e-05, "loss": 0.0071, "step": 13200 }, { "epoch": 9.59, "grad_norm": 0.006215301342308521, "learning_rate": 2.8907447752763654e-05, "loss": 0.008, "step": 13210 }, { "epoch": 9.6, "grad_norm": 0.14043578505516052, "learning_rate": 2.888727507463891e-05, "loss": 0.005, "step": 13220 }, { "epoch": 9.6, "grad_norm": 0.04191066324710846, "learning_rate": 2.8867102396514163e-05, "loss": 0.0046, "step": 13230 }, { "epoch": 9.61, "grad_norm": 0.051841963082551956, "learning_rate": 2.8846929718389414e-05, "loss": 0.0039, "step": 13240 }, { "epoch": 9.62, "grad_norm": 0.01983380690217018, "learning_rate": 2.882675704026467e-05, "loss": 0.0218, "step": 13250 }, { "epoch": 9.63, "grad_norm": 0.001248432556167245, "learning_rate": 2.880658436213992e-05, "loss": 0.0089, "step": 13260 }, { "epoch": 9.63, "grad_norm": 0.013916940428316593, "learning_rate": 2.8786411684015168e-05, "loss": 0.0033, "step": 13270 }, { "epoch": 9.64, "grad_norm": 0.06915153563022614, "learning_rate": 2.8766239005890422e-05, "loss": 0.0101, "step": 13280 }, { "epoch": 9.65, "grad_norm": 0.0015154919819906354, "learning_rate": 2.8746066327765677e-05, "loss": 0.0228, "step": 13290 }, { "epoch": 9.66, "grad_norm": 0.0033606714569032192, "learning_rate": 2.8725893649640928e-05, "loss": 0.0106, "step": 13300 }, { "epoch": 9.66, "grad_norm": 0.001192206982523203, "learning_rate": 2.8705720971516182e-05, "loss": 0.0038, "step": 13310 }, { "epoch": 9.67, "grad_norm": 0.37659505009651184, "learning_rate": 2.8685548293391433e-05, "loss": 0.0237, "step": 13320 }, { "epoch": 9.68, "grad_norm": 0.08516032248735428, "learning_rate": 2.866537561526668e-05, "loss": 0.0068, "step": 13330 }, { "epoch": 9.68, "grad_norm": 0.053294021636247635, "learning_rate": 2.8645202937141935e-05, "loss": 0.0104, "step": 13340 }, { "epoch": 9.69, "grad_norm": 0.0005857631331309676, "learning_rate": 2.862503025901719e-05, "loss": 0.027, "step": 13350 }, { "epoch": 9.7, "grad_norm": 0.0786575973033905, "learning_rate": 2.860485758089244e-05, "loss": 0.0324, "step": 13360 }, { "epoch": 9.71, "grad_norm": 0.013472789898514748, "learning_rate": 2.8584684902767695e-05, "loss": 0.0157, "step": 13370 }, { "epoch": 9.71, "grad_norm": 0.027604950591921806, "learning_rate": 2.8564512224642946e-05, "loss": 0.008, "step": 13380 }, { "epoch": 9.72, "grad_norm": 0.16292813420295715, "learning_rate": 2.8544339546518194e-05, "loss": 0.022, "step": 13390 }, { "epoch": 9.73, "grad_norm": 0.2821474075317383, "learning_rate": 2.852416686839345e-05, "loss": 0.0104, "step": 13400 }, { "epoch": 9.74, "grad_norm": 16.801767349243164, "learning_rate": 2.85039941902687e-05, "loss": 0.0101, "step": 13410 }, { "epoch": 9.74, "grad_norm": 0.1447453647851944, "learning_rate": 2.8483821512143954e-05, "loss": 0.007, "step": 13420 }, { "epoch": 9.75, "grad_norm": 0.9015370011329651, "learning_rate": 2.846364883401921e-05, "loss": 0.0026, "step": 13430 }, { "epoch": 9.76, "grad_norm": 0.09575998038053513, "learning_rate": 2.844347615589446e-05, "loss": 0.0046, "step": 13440 }, { "epoch": 9.76, "grad_norm": 0.8580997586250305, "learning_rate": 2.8423303477769707e-05, "loss": 0.0104, "step": 13450 }, { "epoch": 9.77, "grad_norm": 0.0004182391567155719, "learning_rate": 2.8403130799644962e-05, "loss": 0.0054, "step": 13460 }, { "epoch": 9.78, "grad_norm": 0.0007015741430222988, "learning_rate": 2.8382958121520213e-05, "loss": 0.0103, "step": 13470 }, { "epoch": 9.79, "grad_norm": 0.1236376240849495, "learning_rate": 2.8362785443395467e-05, "loss": 0.0066, "step": 13480 }, { "epoch": 9.79, "grad_norm": 0.005294484551995993, "learning_rate": 2.8342612765270722e-05, "loss": 0.0114, "step": 13490 }, { "epoch": 9.8, "grad_norm": 4.20367431640625, "learning_rate": 2.832244008714597e-05, "loss": 0.0082, "step": 13500 }, { "epoch": 9.81, "grad_norm": 11.44412612915039, "learning_rate": 2.830226740902122e-05, "loss": 0.0145, "step": 13510 }, { "epoch": 9.81, "grad_norm": 0.02178305573761463, "learning_rate": 2.8282094730896475e-05, "loss": 0.0038, "step": 13520 }, { "epoch": 9.82, "grad_norm": 0.04771624505519867, "learning_rate": 2.8261922052771726e-05, "loss": 0.0023, "step": 13530 }, { "epoch": 9.83, "grad_norm": 0.006732448935508728, "learning_rate": 2.824174937464698e-05, "loss": 0.0058, "step": 13540 }, { "epoch": 9.84, "grad_norm": 26.35951805114746, "learning_rate": 2.8221576696522235e-05, "loss": 0.0109, "step": 13550 }, { "epoch": 9.84, "grad_norm": 0.12082793563604355, "learning_rate": 2.8201404018397483e-05, "loss": 0.0042, "step": 13560 }, { "epoch": 9.85, "grad_norm": 0.001372415805235505, "learning_rate": 2.8181231340272734e-05, "loss": 0.0047, "step": 13570 }, { "epoch": 9.86, "grad_norm": 0.0015574540011584759, "learning_rate": 2.816105866214799e-05, "loss": 0.0014, "step": 13580 }, { "epoch": 9.87, "grad_norm": 0.002794192172586918, "learning_rate": 2.814088598402324e-05, "loss": 0.0011, "step": 13590 }, { "epoch": 9.87, "grad_norm": 0.0029143195133656263, "learning_rate": 2.8120713305898494e-05, "loss": 0.0044, "step": 13600 }, { "epoch": 9.88, "grad_norm": 20.12303352355957, "learning_rate": 2.810054062777375e-05, "loss": 0.0099, "step": 13610 }, { "epoch": 9.89, "grad_norm": 0.29002559185028076, "learning_rate": 2.8080367949648996e-05, "loss": 0.003, "step": 13620 }, { "epoch": 9.89, "grad_norm": 0.03364582359790802, "learning_rate": 2.8060195271524247e-05, "loss": 0.0071, "step": 13630 }, { "epoch": 9.9, "grad_norm": 5.244564533233643, "learning_rate": 2.80400225933995e-05, "loss": 0.0132, "step": 13640 }, { "epoch": 9.91, "grad_norm": 0.00039370081503875554, "learning_rate": 2.8019849915274753e-05, "loss": 0.0037, "step": 13650 }, { "epoch": 9.92, "grad_norm": 0.006911866366863251, "learning_rate": 2.7999677237150007e-05, "loss": 0.0104, "step": 13660 }, { "epoch": 9.92, "grad_norm": 0.1128559485077858, "learning_rate": 2.797950455902526e-05, "loss": 0.0097, "step": 13670 }, { "epoch": 9.93, "grad_norm": 0.3790498375892639, "learning_rate": 2.795933188090051e-05, "loss": 0.0078, "step": 13680 }, { "epoch": 9.94, "grad_norm": 19.0465087890625, "learning_rate": 2.793915920277576e-05, "loss": 0.004, "step": 13690 }, { "epoch": 9.95, "grad_norm": 0.04134733974933624, "learning_rate": 2.7918986524651015e-05, "loss": 0.0061, "step": 13700 }, { "epoch": 9.95, "grad_norm": 0.8866470456123352, "learning_rate": 2.7898813846526266e-05, "loss": 0.0113, "step": 13710 }, { "epoch": 9.96, "grad_norm": 0.0012800481636077166, "learning_rate": 2.787864116840152e-05, "loss": 0.013, "step": 13720 }, { "epoch": 9.97, "grad_norm": 0.736062228679657, "learning_rate": 2.7858468490276768e-05, "loss": 0.0013, "step": 13730 }, { "epoch": 9.97, "grad_norm": 0.18687765300273895, "learning_rate": 2.7838295812152022e-05, "loss": 0.0049, "step": 13740 }, { "epoch": 9.98, "grad_norm": 9.7325439453125, "learning_rate": 2.7818123134027274e-05, "loss": 0.03, "step": 13750 }, { "epoch": 9.99, "grad_norm": 0.10014975816011429, "learning_rate": 2.7797950455902528e-05, "loss": 0.0207, "step": 13760 }, { "epoch": 10.0, "grad_norm": 0.029998375102877617, "learning_rate": 2.777777777777778e-05, "loss": 0.0088, "step": 13770 }, { "epoch": 10.0, "eval_accuracy": 0.9981508372282979, "eval_f1": 0.9957295187193796, "eval_loss": 0.004181100055575371, "eval_precision": 0.9994740440751065, "eval_recall": 0.9920129463353519, "eval_roc_auc": 0.999982062040609, "eval_runtime": 386.704, "eval_samples_per_second": 227.947, "eval_steps_per_second": 14.249, "step": 13775 }, { "epoch": 10.0, "grad_norm": 0.07064787298440933, "learning_rate": 2.7757605099653034e-05, "loss": 0.0176, "step": 13780 }, { "epoch": 10.01, "grad_norm": 0.0020518777891993523, "learning_rate": 2.773743242152828e-05, "loss": 0.0056, "step": 13790 }, { "epoch": 10.02, "grad_norm": 2.226813316345215, "learning_rate": 2.7717259743403536e-05, "loss": 0.0137, "step": 13800 }, { "epoch": 10.03, "grad_norm": 0.08585159480571747, "learning_rate": 2.7697087065278787e-05, "loss": 0.0164, "step": 13810 }, { "epoch": 10.03, "grad_norm": 0.04419061914086342, "learning_rate": 2.767691438715404e-05, "loss": 0.0016, "step": 13820 }, { "epoch": 10.04, "grad_norm": 0.11019827425479889, "learning_rate": 2.7656741709029292e-05, "loss": 0.0057, "step": 13830 }, { "epoch": 10.05, "grad_norm": 0.004558782558888197, "learning_rate": 2.7636569030904547e-05, "loss": 0.0248, "step": 13840 }, { "epoch": 10.05, "grad_norm": 0.20797798037528992, "learning_rate": 2.7616396352779795e-05, "loss": 0.004, "step": 13850 }, { "epoch": 10.06, "grad_norm": 0.001556594274006784, "learning_rate": 2.759622367465505e-05, "loss": 0.0042, "step": 13860 }, { "epoch": 10.07, "grad_norm": 2.7074778079986572, "learning_rate": 2.75760509965303e-05, "loss": 0.0186, "step": 13870 }, { "epoch": 10.08, "grad_norm": 1.4041087627410889, "learning_rate": 2.7555878318405555e-05, "loss": 0.0321, "step": 13880 }, { "epoch": 10.08, "grad_norm": 0.0077447472140192986, "learning_rate": 2.7535705640280806e-05, "loss": 0.0115, "step": 13890 }, { "epoch": 10.09, "grad_norm": 0.2554437220096588, "learning_rate": 2.751553296215606e-05, "loss": 0.0112, "step": 13900 }, { "epoch": 10.1, "grad_norm": 0.04327264055609703, "learning_rate": 2.7495360284031308e-05, "loss": 0.0042, "step": 13910 }, { "epoch": 10.11, "grad_norm": 0.0026459668297320604, "learning_rate": 2.747518760590656e-05, "loss": 0.0129, "step": 13920 }, { "epoch": 10.11, "grad_norm": 0.0002914105716627091, "learning_rate": 2.7455014927781813e-05, "loss": 0.004, "step": 13930 }, { "epoch": 10.12, "grad_norm": 0.00024171853146981448, "learning_rate": 2.7434842249657068e-05, "loss": 0.0126, "step": 13940 }, { "epoch": 10.13, "grad_norm": 0.014345817267894745, "learning_rate": 2.741466957153232e-05, "loss": 0.0181, "step": 13950 }, { "epoch": 10.13, "grad_norm": 0.11584875732660294, "learning_rate": 2.7394496893407567e-05, "loss": 0.026, "step": 13960 }, { "epoch": 10.14, "grad_norm": 2.401568651199341, "learning_rate": 2.737432421528282e-05, "loss": 0.0141, "step": 13970 }, { "epoch": 10.15, "grad_norm": 0.01768440753221512, "learning_rate": 2.7354151537158072e-05, "loss": 0.018, "step": 13980 }, { "epoch": 10.16, "grad_norm": 0.038432709872722626, "learning_rate": 2.7333978859033327e-05, "loss": 0.0143, "step": 13990 }, { "epoch": 10.16, "grad_norm": 20.604793548583984, "learning_rate": 2.731380618090858e-05, "loss": 0.029, "step": 14000 }, { "epoch": 10.17, "grad_norm": 0.26159873604774475, "learning_rate": 2.7293633502783832e-05, "loss": 0.0161, "step": 14010 }, { "epoch": 10.18, "grad_norm": 15.196161270141602, "learning_rate": 2.727346082465908e-05, "loss": 0.0231, "step": 14020 }, { "epoch": 10.19, "grad_norm": 19.31382179260254, "learning_rate": 2.7253288146534334e-05, "loss": 0.0157, "step": 14030 }, { "epoch": 10.19, "grad_norm": 0.8836584091186523, "learning_rate": 2.7233115468409585e-05, "loss": 0.0127, "step": 14040 }, { "epoch": 10.2, "grad_norm": 0.014301744289696217, "learning_rate": 2.721294279028484e-05, "loss": 0.0204, "step": 14050 }, { "epoch": 10.21, "grad_norm": 0.010419386439025402, "learning_rate": 2.7192770112160094e-05, "loss": 0.0026, "step": 14060 }, { "epoch": 10.21, "grad_norm": 0.0014298513997346163, "learning_rate": 2.7172597434035345e-05, "loss": 0.0048, "step": 14070 }, { "epoch": 10.22, "grad_norm": 0.0004111882590223104, "learning_rate": 2.7152424755910593e-05, "loss": 0.0045, "step": 14080 }, { "epoch": 10.23, "grad_norm": 9.619214057922363, "learning_rate": 2.7132252077785847e-05, "loss": 0.0135, "step": 14090 }, { "epoch": 10.24, "grad_norm": 0.004899363964796066, "learning_rate": 2.71120793996611e-05, "loss": 0.0243, "step": 14100 }, { "epoch": 10.24, "grad_norm": 0.05989596247673035, "learning_rate": 2.7091906721536353e-05, "loss": 0.0049, "step": 14110 }, { "epoch": 10.25, "grad_norm": 0.08896303921937943, "learning_rate": 2.7071734043411607e-05, "loss": 0.0167, "step": 14120 }, { "epoch": 10.26, "grad_norm": 0.0220908485352993, "learning_rate": 2.705156136528686e-05, "loss": 0.0252, "step": 14130 }, { "epoch": 10.26, "grad_norm": 0.42359215021133423, "learning_rate": 2.7031388687162106e-05, "loss": 0.0049, "step": 14140 }, { "epoch": 10.27, "grad_norm": 0.0026175114326179028, "learning_rate": 2.701121600903736e-05, "loss": 0.0151, "step": 14150 }, { "epoch": 10.28, "grad_norm": 0.0033834856003522873, "learning_rate": 2.6991043330912612e-05, "loss": 0.0054, "step": 14160 }, { "epoch": 10.29, "grad_norm": 13.682700157165527, "learning_rate": 2.6970870652787866e-05, "loss": 0.0343, "step": 14170 }, { "epoch": 10.29, "grad_norm": 10.103180885314941, "learning_rate": 2.695069797466312e-05, "loss": 0.0007, "step": 14180 }, { "epoch": 10.3, "grad_norm": 0.08334420621395111, "learning_rate": 2.693052529653837e-05, "loss": 0.0252, "step": 14190 }, { "epoch": 10.31, "grad_norm": 0.08771252632141113, "learning_rate": 2.691035261841362e-05, "loss": 0.0061, "step": 14200 }, { "epoch": 10.32, "grad_norm": 0.025557437911629677, "learning_rate": 2.6890179940288874e-05, "loss": 0.0153, "step": 14210 }, { "epoch": 10.32, "grad_norm": 0.06524745374917984, "learning_rate": 2.6870007262164125e-05, "loss": 0.0018, "step": 14220 }, { "epoch": 10.33, "grad_norm": 0.2285977602005005, "learning_rate": 2.684983458403938e-05, "loss": 0.0129, "step": 14230 }, { "epoch": 10.34, "grad_norm": 0.004466556012630463, "learning_rate": 2.6829661905914634e-05, "loss": 0.0095, "step": 14240 }, { "epoch": 10.34, "grad_norm": 0.238087996840477, "learning_rate": 2.680948922778988e-05, "loss": 0.0022, "step": 14250 }, { "epoch": 10.35, "grad_norm": 0.004598718602210283, "learning_rate": 2.6789316549665133e-05, "loss": 0.009, "step": 14260 }, { "epoch": 10.36, "grad_norm": 0.0024979293812066317, "learning_rate": 2.6769143871540387e-05, "loss": 0.0039, "step": 14270 }, { "epoch": 10.37, "grad_norm": 0.17606070637702942, "learning_rate": 2.6748971193415638e-05, "loss": 0.0064, "step": 14280 }, { "epoch": 10.37, "grad_norm": 0.0038365270011126995, "learning_rate": 2.6728798515290893e-05, "loss": 0.017, "step": 14290 }, { "epoch": 10.38, "grad_norm": 0.42660027742385864, "learning_rate": 2.6708625837166147e-05, "loss": 0.002, "step": 14300 }, { "epoch": 10.39, "grad_norm": 0.0034667763393372297, "learning_rate": 2.6688453159041395e-05, "loss": 0.0142, "step": 14310 }, { "epoch": 10.4, "grad_norm": 0.26163700222969055, "learning_rate": 2.6668280480916646e-05, "loss": 0.0053, "step": 14320 }, { "epoch": 10.4, "grad_norm": 0.7694606184959412, "learning_rate": 2.66481078027919e-05, "loss": 0.0079, "step": 14330 }, { "epoch": 10.41, "grad_norm": 0.7763135433197021, "learning_rate": 2.662793512466715e-05, "loss": 0.0041, "step": 14340 }, { "epoch": 10.42, "grad_norm": 35.8295783996582, "learning_rate": 2.6607762446542406e-05, "loss": 0.0264, "step": 14350 }, { "epoch": 10.42, "grad_norm": 5.697082042694092, "learning_rate": 2.658758976841766e-05, "loss": 0.0429, "step": 14360 }, { "epoch": 10.43, "grad_norm": 1.5952166318893433, "learning_rate": 2.6567417090292908e-05, "loss": 0.0072, "step": 14370 }, { "epoch": 10.44, "grad_norm": 0.30308908224105835, "learning_rate": 2.654724441216816e-05, "loss": 0.0024, "step": 14380 }, { "epoch": 10.45, "grad_norm": 22.062713623046875, "learning_rate": 2.6527071734043414e-05, "loss": 0.0301, "step": 14390 }, { "epoch": 10.45, "grad_norm": 1.5792371034622192, "learning_rate": 2.6506899055918665e-05, "loss": 0.0236, "step": 14400 }, { "epoch": 10.46, "grad_norm": 0.21174445748329163, "learning_rate": 2.648672637779392e-05, "loss": 0.005, "step": 14410 }, { "epoch": 10.47, "grad_norm": 0.003805481130257249, "learning_rate": 2.6466553699669174e-05, "loss": 0.009, "step": 14420 }, { "epoch": 10.48, "grad_norm": 0.05694010108709335, "learning_rate": 2.6446381021544418e-05, "loss": 0.0185, "step": 14430 }, { "epoch": 10.48, "grad_norm": 0.01830306462943554, "learning_rate": 2.6426208343419672e-05, "loss": 0.0011, "step": 14440 }, { "epoch": 10.49, "grad_norm": 1.060694932937622, "learning_rate": 2.6406035665294927e-05, "loss": 0.0131, "step": 14450 }, { "epoch": 10.5, "grad_norm": 0.5278339982032776, "learning_rate": 2.6385862987170178e-05, "loss": 0.0064, "step": 14460 }, { "epoch": 10.5, "grad_norm": 5.779145240783691, "learning_rate": 2.6365690309045432e-05, "loss": 0.0047, "step": 14470 }, { "epoch": 10.51, "grad_norm": 0.07150599360466003, "learning_rate": 2.634551763092068e-05, "loss": 0.0034, "step": 14480 }, { "epoch": 10.52, "grad_norm": 37.375770568847656, "learning_rate": 2.632534495279593e-05, "loss": 0.0171, "step": 14490 }, { "epoch": 10.53, "grad_norm": 0.21243281662464142, "learning_rate": 2.6305172274671186e-05, "loss": 0.0115, "step": 14500 }, { "epoch": 10.53, "grad_norm": 0.0017663768958300352, "learning_rate": 2.628499959654644e-05, "loss": 0.0041, "step": 14510 }, { "epoch": 10.54, "grad_norm": 0.026727020740509033, "learning_rate": 2.626482691842169e-05, "loss": 0.0026, "step": 14520 }, { "epoch": 10.55, "grad_norm": 0.15876881778240204, "learning_rate": 2.6244654240296946e-05, "loss": 0.0123, "step": 14530 }, { "epoch": 10.56, "grad_norm": 0.006429377943277359, "learning_rate": 2.6224481562172193e-05, "loss": 0.001, "step": 14540 }, { "epoch": 10.56, "grad_norm": 0.0438314713537693, "learning_rate": 2.6204308884047444e-05, "loss": 0.0197, "step": 14550 }, { "epoch": 10.57, "grad_norm": 0.153500035405159, "learning_rate": 2.61841362059227e-05, "loss": 0.0099, "step": 14560 }, { "epoch": 10.58, "grad_norm": 0.0012310659512877464, "learning_rate": 2.6163963527797953e-05, "loss": 0.0103, "step": 14570 }, { "epoch": 10.58, "grad_norm": 14.718790054321289, "learning_rate": 2.6143790849673204e-05, "loss": 0.0082, "step": 14580 }, { "epoch": 10.59, "grad_norm": 0.1377602070569992, "learning_rate": 2.612361817154846e-05, "loss": 0.0043, "step": 14590 }, { "epoch": 10.6, "grad_norm": 0.01837272197008133, "learning_rate": 2.6103445493423707e-05, "loss": 0.0215, "step": 14600 }, { "epoch": 10.61, "grad_norm": 0.0010429949034005404, "learning_rate": 2.6083272815298958e-05, "loss": 0.0145, "step": 14610 }, { "epoch": 10.61, "grad_norm": 0.00035799731267616153, "learning_rate": 2.6063100137174212e-05, "loss": 0.0063, "step": 14620 }, { "epoch": 10.62, "grad_norm": 0.17942433059215546, "learning_rate": 2.6042927459049467e-05, "loss": 0.0061, "step": 14630 }, { "epoch": 10.63, "grad_norm": 8.952868461608887, "learning_rate": 2.6022754780924718e-05, "loss": 0.0197, "step": 14640 }, { "epoch": 10.64, "grad_norm": 5.475463390350342, "learning_rate": 2.6002582102799972e-05, "loss": 0.0044, "step": 14650 }, { "epoch": 10.64, "grad_norm": 0.020051149651408195, "learning_rate": 2.598240942467522e-05, "loss": 0.0021, "step": 14660 }, { "epoch": 10.65, "grad_norm": 0.033862363547086716, "learning_rate": 2.596223674655047e-05, "loss": 0.0175, "step": 14670 }, { "epoch": 10.66, "grad_norm": 0.0405866913497448, "learning_rate": 2.5942064068425725e-05, "loss": 0.0044, "step": 14680 }, { "epoch": 10.66, "grad_norm": 1.4321554899215698, "learning_rate": 2.592189139030098e-05, "loss": 0.0085, "step": 14690 }, { "epoch": 10.67, "grad_norm": 1.3793431520462036, "learning_rate": 2.590171871217623e-05, "loss": 0.0105, "step": 14700 }, { "epoch": 10.68, "grad_norm": 0.22573919594287872, "learning_rate": 2.588154603405148e-05, "loss": 0.0136, "step": 14710 }, { "epoch": 10.69, "grad_norm": 0.015440167859196663, "learning_rate": 2.5861373355926733e-05, "loss": 0.0457, "step": 14720 }, { "epoch": 10.69, "grad_norm": 0.01662645861506462, "learning_rate": 2.5841200677801984e-05, "loss": 0.0057, "step": 14730 }, { "epoch": 10.7, "grad_norm": 0.18805713951587677, "learning_rate": 2.582102799967724e-05, "loss": 0.0251, "step": 14740 }, { "epoch": 10.71, "grad_norm": 3.959599256515503, "learning_rate": 2.5800855321552493e-05, "loss": 0.0078, "step": 14750 }, { "epoch": 10.72, "grad_norm": 0.013117094524204731, "learning_rate": 2.5780682643427744e-05, "loss": 0.0061, "step": 14760 }, { "epoch": 10.72, "grad_norm": 0.08931567519903183, "learning_rate": 2.5760509965302992e-05, "loss": 0.0076, "step": 14770 }, { "epoch": 10.73, "grad_norm": 3.2667019367218018, "learning_rate": 2.5740337287178246e-05, "loss": 0.0039, "step": 14780 }, { "epoch": 10.74, "grad_norm": 0.13932375609874725, "learning_rate": 2.5720164609053497e-05, "loss": 0.0079, "step": 14790 }, { "epoch": 10.74, "grad_norm": 1.5266163349151611, "learning_rate": 2.5699991930928752e-05, "loss": 0.0041, "step": 14800 }, { "epoch": 10.75, "grad_norm": 10.672917366027832, "learning_rate": 2.5679819252804006e-05, "loss": 0.0095, "step": 14810 }, { "epoch": 10.76, "grad_norm": 0.008519817143678665, "learning_rate": 2.5659646574679257e-05, "loss": 0.0158, "step": 14820 }, { "epoch": 10.77, "grad_norm": 0.009736945852637291, "learning_rate": 2.5639473896554505e-05, "loss": 0.0024, "step": 14830 }, { "epoch": 10.77, "grad_norm": 0.02101651206612587, "learning_rate": 2.561930121842976e-05, "loss": 0.0281, "step": 14840 }, { "epoch": 10.78, "grad_norm": 0.009922484867274761, "learning_rate": 2.559912854030501e-05, "loss": 0.0306, "step": 14850 }, { "epoch": 10.79, "grad_norm": 1.5312800407409668, "learning_rate": 2.5578955862180265e-05, "loss": 0.0055, "step": 14860 }, { "epoch": 10.79, "grad_norm": 0.24132943153381348, "learning_rate": 2.555878318405552e-05, "loss": 0.0061, "step": 14870 }, { "epoch": 10.8, "grad_norm": 2.395301342010498, "learning_rate": 2.553861050593077e-05, "loss": 0.0133, "step": 14880 }, { "epoch": 10.81, "grad_norm": 0.15985184907913208, "learning_rate": 2.551843782780602e-05, "loss": 0.0197, "step": 14890 }, { "epoch": 10.82, "grad_norm": 1.5994820594787598, "learning_rate": 2.5498265149681273e-05, "loss": 0.0073, "step": 14900 }, { "epoch": 10.82, "grad_norm": 0.009897888638079166, "learning_rate": 2.5478092471556524e-05, "loss": 0.0058, "step": 14910 }, { "epoch": 10.83, "grad_norm": 0.013136355206370354, "learning_rate": 2.545791979343178e-05, "loss": 0.005, "step": 14920 }, { "epoch": 10.84, "grad_norm": 0.7152259945869446, "learning_rate": 2.5437747115307033e-05, "loss": 0.0131, "step": 14930 }, { "epoch": 10.85, "grad_norm": 0.002481738803908229, "learning_rate": 2.5417574437182277e-05, "loss": 0.0053, "step": 14940 }, { "epoch": 10.85, "grad_norm": 0.5290198922157288, "learning_rate": 2.539740175905753e-05, "loss": 0.0109, "step": 14950 }, { "epoch": 10.86, "grad_norm": 0.23663221299648285, "learning_rate": 2.5377229080932786e-05, "loss": 0.0159, "step": 14960 }, { "epoch": 10.87, "grad_norm": 0.03912312537431717, "learning_rate": 2.5357056402808037e-05, "loss": 0.0078, "step": 14970 }, { "epoch": 10.87, "grad_norm": 0.0030671055428683758, "learning_rate": 2.533688372468329e-05, "loss": 0.0033, "step": 14980 }, { "epoch": 10.88, "grad_norm": 3.3906474113464355, "learning_rate": 2.5316711046558546e-05, "loss": 0.0024, "step": 14990 }, { "epoch": 10.89, "grad_norm": 0.002407472115010023, "learning_rate": 2.529653836843379e-05, "loss": 0.0038, "step": 15000 }, { "epoch": 10.9, "grad_norm": 0.9811303615570068, "learning_rate": 2.5276365690309045e-05, "loss": 0.0272, "step": 15010 }, { "epoch": 10.9, "grad_norm": 0.217171773314476, "learning_rate": 2.52561930121843e-05, "loss": 0.013, "step": 15020 }, { "epoch": 10.91, "grad_norm": 0.01452325563877821, "learning_rate": 2.523602033405955e-05, "loss": 0.0065, "step": 15030 }, { "epoch": 10.92, "grad_norm": 0.003643118543550372, "learning_rate": 2.5215847655934805e-05, "loss": 0.0054, "step": 15040 }, { "epoch": 10.93, "grad_norm": 0.1353772133588791, "learning_rate": 2.5195674977810056e-05, "loss": 0.0043, "step": 15050 }, { "epoch": 10.93, "grad_norm": 0.01406155712902546, "learning_rate": 2.5175502299685304e-05, "loss": 0.0071, "step": 15060 }, { "epoch": 10.94, "grad_norm": 0.0014850402949377894, "learning_rate": 2.5155329621560558e-05, "loss": 0.0029, "step": 15070 }, { "epoch": 10.95, "grad_norm": 0.0004027250688523054, "learning_rate": 2.5135156943435813e-05, "loss": 0.0017, "step": 15080 }, { "epoch": 10.95, "grad_norm": 0.059271667152643204, "learning_rate": 2.5114984265311064e-05, "loss": 0.0041, "step": 15090 }, { "epoch": 10.96, "grad_norm": 1.4982573986053467, "learning_rate": 2.5094811587186318e-05, "loss": 0.0103, "step": 15100 }, { "epoch": 10.97, "grad_norm": 11.631914138793945, "learning_rate": 2.507463890906157e-05, "loss": 0.0164, "step": 15110 }, { "epoch": 10.98, "grad_norm": 0.01383855938911438, "learning_rate": 2.5054466230936817e-05, "loss": 0.0088, "step": 15120 }, { "epoch": 10.98, "grad_norm": 0.0035403859801590443, "learning_rate": 2.503429355281207e-05, "loss": 0.0115, "step": 15130 }, { "epoch": 10.99, "grad_norm": 0.2655543088912964, "learning_rate": 2.5014120874687326e-05, "loss": 0.0128, "step": 15140 }, { "epoch": 11.0, "grad_norm": 1.0399569272994995, "learning_rate": 2.4993948196562577e-05, "loss": 0.0078, "step": 15150 }, { "epoch": 11.0, "eval_accuracy": 0.9981508372282979, "eval_f1": 0.995729966206481, "eval_loss": 0.004333225544542074, "eval_precision": 0.9993689856444234, "eval_recall": 0.9921173522656087, "eval_roc_auc": 0.9999826230983933, "eval_runtime": 388.635, "eval_samples_per_second": 226.814, "eval_steps_per_second": 14.178, "step": 15152 }, { "epoch": 11.01, "grad_norm": 0.0071834782138466835, "learning_rate": 2.4973775518437828e-05, "loss": 0.0167, "step": 15160 }, { "epoch": 11.01, "grad_norm": 0.25943008065223694, "learning_rate": 2.4953602840313082e-05, "loss": 0.0008, "step": 15170 }, { "epoch": 11.02, "grad_norm": 0.2013348937034607, "learning_rate": 2.4933430162188333e-05, "loss": 0.0068, "step": 15180 }, { "epoch": 11.03, "grad_norm": 3.2597954273223877, "learning_rate": 2.4913257484063585e-05, "loss": 0.018, "step": 15190 }, { "epoch": 11.03, "grad_norm": 0.036766473203897476, "learning_rate": 2.489308480593884e-05, "loss": 0.002, "step": 15200 }, { "epoch": 11.04, "grad_norm": 0.5187474489212036, "learning_rate": 2.487291212781409e-05, "loss": 0.0173, "step": 15210 }, { "epoch": 11.05, "grad_norm": 0.0028880988247692585, "learning_rate": 2.485273944968934e-05, "loss": 0.0036, "step": 15220 }, { "epoch": 11.06, "grad_norm": 0.13685983419418335, "learning_rate": 2.4832566771564596e-05, "loss": 0.0083, "step": 15230 }, { "epoch": 11.06, "grad_norm": 0.02378654293715954, "learning_rate": 2.4812394093439847e-05, "loss": 0.0243, "step": 15240 }, { "epoch": 11.07, "grad_norm": 0.009897316806018353, "learning_rate": 2.4792221415315098e-05, "loss": 0.0062, "step": 15250 }, { "epoch": 11.08, "grad_norm": 0.005875424947589636, "learning_rate": 2.4772048737190352e-05, "loss": 0.0028, "step": 15260 }, { "epoch": 11.09, "grad_norm": 0.008453444577753544, "learning_rate": 2.4751876059065603e-05, "loss": 0.0019, "step": 15270 }, { "epoch": 11.09, "grad_norm": 1.0412099361419678, "learning_rate": 2.4731703380940854e-05, "loss": 0.0115, "step": 15280 }, { "epoch": 11.1, "grad_norm": 0.19635595381259918, "learning_rate": 2.471153070281611e-05, "loss": 0.0041, "step": 15290 }, { "epoch": 11.11, "grad_norm": 0.1889576017856598, "learning_rate": 2.4691358024691357e-05, "loss": 0.0023, "step": 15300 }, { "epoch": 11.11, "grad_norm": 0.0010881648631766438, "learning_rate": 2.467118534656661e-05, "loss": 0.0103, "step": 15310 }, { "epoch": 11.12, "grad_norm": 0.00150064448826015, "learning_rate": 2.4651012668441866e-05, "loss": 0.0192, "step": 15320 }, { "epoch": 11.13, "grad_norm": 0.2817605435848236, "learning_rate": 2.4630839990317113e-05, "loss": 0.0132, "step": 15330 }, { "epoch": 11.14, "grad_norm": 5.953119277954102, "learning_rate": 2.4610667312192368e-05, "loss": 0.0051, "step": 15340 }, { "epoch": 11.14, "grad_norm": 0.0015101551543921232, "learning_rate": 2.4590494634067622e-05, "loss": 0.0095, "step": 15350 }, { "epoch": 11.15, "grad_norm": 0.10892230272293091, "learning_rate": 2.457032195594287e-05, "loss": 0.0132, "step": 15360 }, { "epoch": 11.16, "grad_norm": 0.0027881257701665163, "learning_rate": 2.4550149277818124e-05, "loss": 0.002, "step": 15370 }, { "epoch": 11.17, "grad_norm": 0.05755399912595749, "learning_rate": 2.452997659969338e-05, "loss": 0.0006, "step": 15380 }, { "epoch": 11.17, "grad_norm": 0.019885243847966194, "learning_rate": 2.4509803921568626e-05, "loss": 0.0041, "step": 15390 }, { "epoch": 11.18, "grad_norm": 1.7892396450042725, "learning_rate": 2.448963124344388e-05, "loss": 0.0077, "step": 15400 }, { "epoch": 11.19, "grad_norm": 0.0175933800637722, "learning_rate": 2.4469458565319135e-05, "loss": 0.0047, "step": 15410 }, { "epoch": 11.19, "grad_norm": 0.6241514086723328, "learning_rate": 2.4449285887194383e-05, "loss": 0.0067, "step": 15420 }, { "epoch": 11.2, "grad_norm": 0.009889623150229454, "learning_rate": 2.4429113209069638e-05, "loss": 0.0122, "step": 15430 }, { "epoch": 11.21, "grad_norm": 0.005964280106127262, "learning_rate": 2.4408940530944892e-05, "loss": 0.0056, "step": 15440 }, { "epoch": 11.22, "grad_norm": 0.0019957488402724266, "learning_rate": 2.438876785282014e-05, "loss": 0.0011, "step": 15450 }, { "epoch": 11.22, "grad_norm": 0.09828945994377136, "learning_rate": 2.4368595174695394e-05, "loss": 0.0047, "step": 15460 }, { "epoch": 11.23, "grad_norm": 0.011916988529264927, "learning_rate": 2.434842249657065e-05, "loss": 0.005, "step": 15470 }, { "epoch": 11.24, "grad_norm": 21.119287490844727, "learning_rate": 2.4328249818445896e-05, "loss": 0.0142, "step": 15480 }, { "epoch": 11.25, "grad_norm": 0.0025653273332864046, "learning_rate": 2.430807714032115e-05, "loss": 0.0039, "step": 15490 }, { "epoch": 11.25, "grad_norm": 2.179283380508423, "learning_rate": 2.4287904462196405e-05, "loss": 0.0019, "step": 15500 }, { "epoch": 11.26, "grad_norm": 0.041954535990953445, "learning_rate": 2.4267731784071653e-05, "loss": 0.0018, "step": 15510 }, { "epoch": 11.27, "grad_norm": 0.13100317120552063, "learning_rate": 2.4247559105946907e-05, "loss": 0.0206, "step": 15520 }, { "epoch": 11.27, "grad_norm": 0.11618025600910187, "learning_rate": 2.4227386427822162e-05, "loss": 0.0009, "step": 15530 }, { "epoch": 11.28, "grad_norm": 0.12495038658380508, "learning_rate": 2.420721374969741e-05, "loss": 0.0018, "step": 15540 }, { "epoch": 11.29, "grad_norm": 0.1562829166650772, "learning_rate": 2.4187041071572664e-05, "loss": 0.0415, "step": 15550 }, { "epoch": 11.3, "grad_norm": 0.40936851501464844, "learning_rate": 2.4166868393447915e-05, "loss": 0.0005, "step": 15560 }, { "epoch": 11.3, "grad_norm": 0.7347458004951477, "learning_rate": 2.4146695715323166e-05, "loss": 0.0011, "step": 15570 }, { "epoch": 11.31, "grad_norm": 0.0022218599915504456, "learning_rate": 2.412652303719842e-05, "loss": 0.0045, "step": 15580 }, { "epoch": 11.32, "grad_norm": 0.26786237955093384, "learning_rate": 2.4106350359073672e-05, "loss": 0.0198, "step": 15590 }, { "epoch": 11.32, "grad_norm": 0.002820109250023961, "learning_rate": 2.4086177680948923e-05, "loss": 0.0037, "step": 15600 }, { "epoch": 11.33, "grad_norm": 0.006033481098711491, "learning_rate": 2.4066005002824177e-05, "loss": 0.0024, "step": 15610 }, { "epoch": 11.34, "grad_norm": 0.0011767403921112418, "learning_rate": 2.404583232469943e-05, "loss": 0.0033, "step": 15620 }, { "epoch": 11.35, "grad_norm": 26.068288803100586, "learning_rate": 2.402565964657468e-05, "loss": 0.0076, "step": 15630 }, { "epoch": 11.35, "grad_norm": 0.006819643080234528, "learning_rate": 2.4005486968449934e-05, "loss": 0.0009, "step": 15640 }, { "epoch": 11.36, "grad_norm": 0.0006057365681044757, "learning_rate": 2.3985314290325185e-05, "loss": 0.0, "step": 15650 }, { "epoch": 11.37, "grad_norm": 31.54865264892578, "learning_rate": 2.3965141612200436e-05, "loss": 0.015, "step": 15660 }, { "epoch": 11.38, "grad_norm": 6.902843952178955, "learning_rate": 2.394496893407569e-05, "loss": 0.009, "step": 15670 }, { "epoch": 11.38, "grad_norm": 0.00346160470508039, "learning_rate": 2.392479625595094e-05, "loss": 0.0145, "step": 15680 }, { "epoch": 11.39, "grad_norm": 0.0008461098768748343, "learning_rate": 2.3904623577826193e-05, "loss": 0.0021, "step": 15690 }, { "epoch": 11.4, "grad_norm": 8.534778594970703, "learning_rate": 2.3884450899701447e-05, "loss": 0.0039, "step": 15700 }, { "epoch": 11.4, "grad_norm": 30.421672821044922, "learning_rate": 2.3864278221576698e-05, "loss": 0.0148, "step": 15710 }, { "epoch": 11.41, "grad_norm": 0.011093859560787678, "learning_rate": 2.384410554345195e-05, "loss": 0.0017, "step": 15720 }, { "epoch": 11.42, "grad_norm": 0.0029349441174417734, "learning_rate": 2.3823932865327204e-05, "loss": 0.0215, "step": 15730 }, { "epoch": 11.43, "grad_norm": 7.691098690032959, "learning_rate": 2.3803760187202455e-05, "loss": 0.0188, "step": 15740 }, { "epoch": 11.43, "grad_norm": 0.11859267950057983, "learning_rate": 2.3783587509077706e-05, "loss": 0.0291, "step": 15750 }, { "epoch": 11.44, "grad_norm": 0.039303623139858246, "learning_rate": 2.376341483095296e-05, "loss": 0.0058, "step": 15760 }, { "epoch": 11.45, "grad_norm": 0.08446269482374191, "learning_rate": 2.374324215282821e-05, "loss": 0.0104, "step": 15770 }, { "epoch": 11.46, "grad_norm": 0.0554688386619091, "learning_rate": 2.3723069474703462e-05, "loss": 0.0039, "step": 15780 }, { "epoch": 11.46, "grad_norm": 0.057702578604221344, "learning_rate": 2.3702896796578717e-05, "loss": 0.004, "step": 15790 }, { "epoch": 11.47, "grad_norm": 0.04934044927358627, "learning_rate": 2.3682724118453968e-05, "loss": 0.0078, "step": 15800 }, { "epoch": 11.48, "grad_norm": 0.025431061163544655, "learning_rate": 2.366255144032922e-05, "loss": 0.0396, "step": 15810 }, { "epoch": 11.48, "grad_norm": 0.8368417620658875, "learning_rate": 2.364237876220447e-05, "loss": 0.0046, "step": 15820 }, { "epoch": 11.49, "grad_norm": 0.12073979526758194, "learning_rate": 2.3622206084079725e-05, "loss": 0.0058, "step": 15830 }, { "epoch": 11.5, "grad_norm": 0.2134593278169632, "learning_rate": 2.3602033405954976e-05, "loss": 0.0023, "step": 15840 }, { "epoch": 11.51, "grad_norm": 0.0010249282931908965, "learning_rate": 2.3581860727830227e-05, "loss": 0.0034, "step": 15850 }, { "epoch": 11.51, "grad_norm": 7.7366180419921875, "learning_rate": 2.356168804970548e-05, "loss": 0.0234, "step": 15860 }, { "epoch": 11.52, "grad_norm": 0.0021505900658667088, "learning_rate": 2.3541515371580732e-05, "loss": 0.0072, "step": 15870 }, { "epoch": 11.53, "grad_norm": 0.06317915767431259, "learning_rate": 2.3521342693455983e-05, "loss": 0.0089, "step": 15880 }, { "epoch": 11.54, "grad_norm": 0.20523525774478912, "learning_rate": 2.3501170015331238e-05, "loss": 0.0095, "step": 15890 }, { "epoch": 11.54, "grad_norm": 0.0021271202713251114, "learning_rate": 2.348099733720649e-05, "loss": 0.0048, "step": 15900 }, { "epoch": 11.55, "grad_norm": 0.35318243503570557, "learning_rate": 2.346082465908174e-05, "loss": 0.0221, "step": 15910 }, { "epoch": 11.56, "grad_norm": 0.0006868810160085559, "learning_rate": 2.3440651980956995e-05, "loss": 0.0143, "step": 15920 }, { "epoch": 11.56, "grad_norm": 8.075051307678223, "learning_rate": 2.3420479302832246e-05, "loss": 0.0014, "step": 15930 }, { "epoch": 11.57, "grad_norm": 0.32641565799713135, "learning_rate": 2.3400306624707497e-05, "loss": 0.0079, "step": 15940 }, { "epoch": 11.58, "grad_norm": 0.015668796375393867, "learning_rate": 2.338013394658275e-05, "loss": 0.0074, "step": 15950 }, { "epoch": 11.59, "grad_norm": 0.0006520305760204792, "learning_rate": 2.3359961268458002e-05, "loss": 0.0031, "step": 15960 }, { "epoch": 11.59, "grad_norm": 0.003270229557529092, "learning_rate": 2.3339788590333253e-05, "loss": 0.0027, "step": 15970 }, { "epoch": 11.6, "grad_norm": 0.07816585153341293, "learning_rate": 2.3319615912208508e-05, "loss": 0.0081, "step": 15980 }, { "epoch": 11.61, "grad_norm": 0.0006159089971333742, "learning_rate": 2.329944323408376e-05, "loss": 0.0089, "step": 15990 }, { "epoch": 11.62, "grad_norm": 10.090989112854004, "learning_rate": 2.327927055595901e-05, "loss": 0.0036, "step": 16000 }, { "epoch": 11.62, "grad_norm": 0.0013605119893327355, "learning_rate": 2.3259097877834264e-05, "loss": 0.0034, "step": 16010 }, { "epoch": 11.63, "grad_norm": 0.001587074133567512, "learning_rate": 2.3238925199709515e-05, "loss": 0.0173, "step": 16020 }, { "epoch": 11.64, "grad_norm": 0.042119793593883514, "learning_rate": 2.3218752521584767e-05, "loss": 0.0054, "step": 16030 }, { "epoch": 11.64, "grad_norm": 0.0023439363576471806, "learning_rate": 2.319857984346002e-05, "loss": 0.0122, "step": 16040 }, { "epoch": 11.65, "grad_norm": 0.0015862892614677548, "learning_rate": 2.317840716533527e-05, "loss": 0.0055, "step": 16050 }, { "epoch": 11.66, "grad_norm": 0.04544699564576149, "learning_rate": 2.3158234487210523e-05, "loss": 0.0105, "step": 16060 }, { "epoch": 11.67, "grad_norm": 0.005376008804887533, "learning_rate": 2.3138061809085774e-05, "loss": 0.0, "step": 16070 }, { "epoch": 11.67, "grad_norm": 0.6874623894691467, "learning_rate": 2.3117889130961025e-05, "loss": 0.0042, "step": 16080 }, { "epoch": 11.68, "grad_norm": 0.23841716349124908, "learning_rate": 2.309771645283628e-05, "loss": 0.0028, "step": 16090 }, { "epoch": 11.69, "grad_norm": 0.046092964708805084, "learning_rate": 2.307754377471153e-05, "loss": 0.001, "step": 16100 }, { "epoch": 11.7, "grad_norm": 0.0021304069086909294, "learning_rate": 2.3057371096586782e-05, "loss": 0.0005, "step": 16110 }, { "epoch": 11.7, "grad_norm": 7.387868404388428, "learning_rate": 2.3037198418462036e-05, "loss": 0.0038, "step": 16120 }, { "epoch": 11.71, "grad_norm": 0.0003391271748114377, "learning_rate": 2.3017025740337287e-05, "loss": 0.001, "step": 16130 }, { "epoch": 11.72, "grad_norm": 0.28313395380973816, "learning_rate": 2.299685306221254e-05, "loss": 0.007, "step": 16140 }, { "epoch": 11.72, "grad_norm": 0.04677537456154823, "learning_rate": 2.2976680384087793e-05, "loss": 0.0011, "step": 16150 }, { "epoch": 11.73, "grad_norm": 0.15080393850803375, "learning_rate": 2.2956507705963044e-05, "loss": 0.0068, "step": 16160 }, { "epoch": 11.74, "grad_norm": 0.0016375051345676184, "learning_rate": 2.2936335027838295e-05, "loss": 0.0045, "step": 16170 }, { "epoch": 11.75, "grad_norm": 0.6318696141242981, "learning_rate": 2.291616234971355e-05, "loss": 0.015, "step": 16180 }, { "epoch": 11.75, "grad_norm": 0.010398590005934238, "learning_rate": 2.28959896715888e-05, "loss": 0.0044, "step": 16190 }, { "epoch": 11.76, "grad_norm": 6.571838855743408, "learning_rate": 2.2875816993464052e-05, "loss": 0.0289, "step": 16200 }, { "epoch": 11.77, "grad_norm": 0.23911860585212708, "learning_rate": 2.2855644315339306e-05, "loss": 0.0108, "step": 16210 }, { "epoch": 11.77, "grad_norm": 0.0554070845246315, "learning_rate": 2.2835471637214557e-05, "loss": 0.0159, "step": 16220 }, { "epoch": 11.78, "grad_norm": 0.001894534332677722, "learning_rate": 2.281529895908981e-05, "loss": 0.0006, "step": 16230 }, { "epoch": 11.79, "grad_norm": 0.11737006157636642, "learning_rate": 2.2795126280965063e-05, "loss": 0.003, "step": 16240 }, { "epoch": 11.8, "grad_norm": 0.0013864507200196385, "learning_rate": 2.2774953602840314e-05, "loss": 0.0022, "step": 16250 }, { "epoch": 11.8, "grad_norm": 0.004860733635723591, "learning_rate": 2.2754780924715565e-05, "loss": 0.0017, "step": 16260 }, { "epoch": 11.81, "grad_norm": 0.000366124149877578, "learning_rate": 2.273460824659082e-05, "loss": 0.0073, "step": 16270 }, { "epoch": 11.82, "grad_norm": 0.09364797174930573, "learning_rate": 2.271443556846607e-05, "loss": 0.0087, "step": 16280 }, { "epoch": 11.83, "grad_norm": 0.02469642087817192, "learning_rate": 2.269426289034132e-05, "loss": 0.0099, "step": 16290 }, { "epoch": 11.83, "grad_norm": 5.18701696395874, "learning_rate": 2.2674090212216576e-05, "loss": 0.0165, "step": 16300 }, { "epoch": 11.84, "grad_norm": 0.10657285153865814, "learning_rate": 2.2653917534091827e-05, "loss": 0.0028, "step": 16310 }, { "epoch": 11.85, "grad_norm": 0.03622545674443245, "learning_rate": 2.2633744855967078e-05, "loss": 0.0017, "step": 16320 }, { "epoch": 11.85, "grad_norm": 0.005775143392384052, "learning_rate": 2.2613572177842333e-05, "loss": 0.0023, "step": 16330 }, { "epoch": 11.86, "grad_norm": 0.07602769881486893, "learning_rate": 2.2593399499717584e-05, "loss": 0.0167, "step": 16340 }, { "epoch": 11.87, "grad_norm": 0.043435435742139816, "learning_rate": 2.2573226821592835e-05, "loss": 0.0031, "step": 16350 }, { "epoch": 11.88, "grad_norm": 0.002995203249156475, "learning_rate": 2.255305414346809e-05, "loss": 0.0255, "step": 16360 }, { "epoch": 11.88, "grad_norm": 0.024543453007936478, "learning_rate": 2.253288146534334e-05, "loss": 0.0191, "step": 16370 }, { "epoch": 11.89, "grad_norm": 0.10300853848457336, "learning_rate": 2.251270878721859e-05, "loss": 0.0036, "step": 16380 }, { "epoch": 11.9, "grad_norm": 0.009681067429482937, "learning_rate": 2.2492536109093846e-05, "loss": 0.0027, "step": 16390 }, { "epoch": 11.91, "grad_norm": 0.030278483405709267, "learning_rate": 2.2472363430969097e-05, "loss": 0.0013, "step": 16400 }, { "epoch": 11.91, "grad_norm": 0.1338738054037094, "learning_rate": 2.2452190752844348e-05, "loss": 0.0014, "step": 16410 }, { "epoch": 11.92, "grad_norm": 0.1927368938922882, "learning_rate": 2.2432018074719603e-05, "loss": 0.0078, "step": 16420 }, { "epoch": 11.93, "grad_norm": 0.06984108686447144, "learning_rate": 2.2411845396594854e-05, "loss": 0.0078, "step": 16430 }, { "epoch": 11.93, "grad_norm": 0.19020429253578186, "learning_rate": 2.2391672718470105e-05, "loss": 0.004, "step": 16440 }, { "epoch": 11.94, "grad_norm": 0.23069943487644196, "learning_rate": 2.237150004034536e-05, "loss": 0.0034, "step": 16450 }, { "epoch": 11.95, "grad_norm": 1.717404842376709, "learning_rate": 2.235132736222061e-05, "loss": 0.0058, "step": 16460 }, { "epoch": 11.96, "grad_norm": 13.391761779785156, "learning_rate": 2.233115468409586e-05, "loss": 0.0138, "step": 16470 }, { "epoch": 11.96, "grad_norm": 0.0007910021813586354, "learning_rate": 2.2310982005971116e-05, "loss": 0.0015, "step": 16480 }, { "epoch": 11.97, "grad_norm": 0.0008350893040187657, "learning_rate": 2.2290809327846367e-05, "loss": 0.0077, "step": 16490 }, { "epoch": 11.98, "grad_norm": 0.005978062283247709, "learning_rate": 2.2270636649721618e-05, "loss": 0.009, "step": 16500 }, { "epoch": 11.99, "grad_norm": 0.059390950947999954, "learning_rate": 2.2250463971596872e-05, "loss": 0.0029, "step": 16510 }, { "epoch": 11.99, "grad_norm": 0.384694904088974, "learning_rate": 2.2230291293472124e-05, "loss": 0.0059, "step": 16520 }, { "epoch": 12.0, "grad_norm": 9.535301208496094, "learning_rate": 2.2210118615347375e-05, "loss": 0.0142, "step": 16530 }, { "epoch": 12.0, "eval_accuracy": 0.9982189045695875, "eval_f1": 0.9958938145678043, "eval_loss": 0.003976278472691774, "eval_precision": 0.997903454059437, "eval_recall": 0.993892253079975, "eval_roc_auc": 0.9999848726260968, "eval_runtime": 386.255, "eval_samples_per_second": 228.212, "eval_steps_per_second": 14.265, "step": 16530 }, { "epoch": 12.01, "grad_norm": 0.000495292479172349, "learning_rate": 2.2189945937222626e-05, "loss": 0.0122, "step": 16540 }, { "epoch": 12.01, "grad_norm": 0.10421086847782135, "learning_rate": 2.216977325909788e-05, "loss": 0.0062, "step": 16550 }, { "epoch": 12.02, "grad_norm": 0.003116333158686757, "learning_rate": 2.214960058097313e-05, "loss": 0.0042, "step": 16560 }, { "epoch": 12.03, "grad_norm": 0.049882255494594574, "learning_rate": 2.2129427902848382e-05, "loss": 0.0033, "step": 16570 }, { "epoch": 12.04, "grad_norm": 0.006924469955265522, "learning_rate": 2.2109255224723637e-05, "loss": 0.0145, "step": 16580 }, { "epoch": 12.04, "grad_norm": 1.2397273778915405, "learning_rate": 2.2089082546598888e-05, "loss": 0.0192, "step": 16590 }, { "epoch": 12.05, "grad_norm": 0.001038398826494813, "learning_rate": 2.206890986847414e-05, "loss": 0.0038, "step": 16600 }, { "epoch": 12.06, "grad_norm": 2.1774699687957764, "learning_rate": 2.204873719034939e-05, "loss": 0.0145, "step": 16610 }, { "epoch": 12.07, "grad_norm": 0.2245302051305771, "learning_rate": 2.2028564512224644e-05, "loss": 0.0072, "step": 16620 }, { "epoch": 12.07, "grad_norm": 0.0035848254337906837, "learning_rate": 2.2008391834099896e-05, "loss": 0.0074, "step": 16630 }, { "epoch": 12.08, "grad_norm": 0.0033830467145889997, "learning_rate": 2.1988219155975147e-05, "loss": 0.0019, "step": 16640 }, { "epoch": 12.09, "grad_norm": 0.0030913010705262423, "learning_rate": 2.19680464778504e-05, "loss": 0.007, "step": 16650 }, { "epoch": 12.09, "grad_norm": 11.367694854736328, "learning_rate": 2.1947873799725652e-05, "loss": 0.0155, "step": 16660 }, { "epoch": 12.1, "grad_norm": 0.0011047075968235731, "learning_rate": 2.1927701121600903e-05, "loss": 0.0087, "step": 16670 }, { "epoch": 12.11, "grad_norm": 0.018373142927885056, "learning_rate": 2.1907528443476158e-05, "loss": 0.0004, "step": 16680 }, { "epoch": 12.12, "grad_norm": 0.0015459812711924314, "learning_rate": 2.188735576535141e-05, "loss": 0.0121, "step": 16690 }, { "epoch": 12.12, "grad_norm": 0.09375672042369843, "learning_rate": 2.186718308722666e-05, "loss": 0.0195, "step": 16700 }, { "epoch": 12.13, "grad_norm": 0.009158155880868435, "learning_rate": 2.1847010409101914e-05, "loss": 0.0024, "step": 16710 }, { "epoch": 12.14, "grad_norm": 0.03295760229229927, "learning_rate": 2.1826837730977165e-05, "loss": 0.0081, "step": 16720 }, { "epoch": 12.15, "grad_norm": 0.030867867171764374, "learning_rate": 2.1806665052852416e-05, "loss": 0.0065, "step": 16730 }, { "epoch": 12.15, "grad_norm": 0.08418185263872147, "learning_rate": 2.178649237472767e-05, "loss": 0.0027, "step": 16740 }, { "epoch": 12.16, "grad_norm": 0.006285299547016621, "learning_rate": 2.1766319696602922e-05, "loss": 0.0081, "step": 16750 }, { "epoch": 12.17, "grad_norm": 0.0018992135301232338, "learning_rate": 2.1746147018478173e-05, "loss": 0.0171, "step": 16760 }, { "epoch": 12.17, "grad_norm": 0.3470768332481384, "learning_rate": 2.1725974340353428e-05, "loss": 0.0004, "step": 16770 }, { "epoch": 12.18, "grad_norm": 0.04968973249197006, "learning_rate": 2.170580166222868e-05, "loss": 0.0041, "step": 16780 }, { "epoch": 12.19, "grad_norm": 0.04065666347742081, "learning_rate": 2.168562898410393e-05, "loss": 0.0064, "step": 16790 }, { "epoch": 12.2, "grad_norm": 0.6985291838645935, "learning_rate": 2.166545630597918e-05, "loss": 0.0129, "step": 16800 }, { "epoch": 12.2, "grad_norm": 0.01881437934935093, "learning_rate": 2.1645283627854435e-05, "loss": 0.0119, "step": 16810 }, { "epoch": 12.21, "grad_norm": 0.04512902721762657, "learning_rate": 2.1625110949729686e-05, "loss": 0.0052, "step": 16820 }, { "epoch": 12.22, "grad_norm": 0.0031053638085722923, "learning_rate": 2.1604938271604937e-05, "loss": 0.0074, "step": 16830 }, { "epoch": 12.23, "grad_norm": 0.13527894020080566, "learning_rate": 2.1584765593480192e-05, "loss": 0.005, "step": 16840 }, { "epoch": 12.23, "grad_norm": 0.027386058121919632, "learning_rate": 2.1564592915355443e-05, "loss": 0.0018, "step": 16850 }, { "epoch": 12.24, "grad_norm": 0.002343888161703944, "learning_rate": 2.1544420237230694e-05, "loss": 0.0065, "step": 16860 }, { "epoch": 12.25, "grad_norm": 0.002938592340797186, "learning_rate": 2.152424755910595e-05, "loss": 0.0, "step": 16870 }, { "epoch": 12.25, "grad_norm": 0.0012315199710428715, "learning_rate": 2.15040748809812e-05, "loss": 0.0007, "step": 16880 }, { "epoch": 12.26, "grad_norm": 0.0013336024712771177, "learning_rate": 2.148390220285645e-05, "loss": 0.0021, "step": 16890 }, { "epoch": 12.27, "grad_norm": 0.11299686878919601, "learning_rate": 2.1463729524731705e-05, "loss": 0.01, "step": 16900 }, { "epoch": 12.28, "grad_norm": 20.187990188598633, "learning_rate": 2.1443556846606956e-05, "loss": 0.0147, "step": 16910 }, { "epoch": 12.28, "grad_norm": 0.3132416903972626, "learning_rate": 2.1423384168482207e-05, "loss": 0.0086, "step": 16920 }, { "epoch": 12.29, "grad_norm": 0.0373138003051281, "learning_rate": 2.1403211490357462e-05, "loss": 0.0147, "step": 16930 }, { "epoch": 12.3, "grad_norm": 0.0028076451271772385, "learning_rate": 2.1383038812232713e-05, "loss": 0.0013, "step": 16940 }, { "epoch": 12.3, "grad_norm": 0.2017146497964859, "learning_rate": 2.1362866134107964e-05, "loss": 0.0276, "step": 16950 }, { "epoch": 12.31, "grad_norm": 0.0041721658781170845, "learning_rate": 2.134269345598322e-05, "loss": 0.0106, "step": 16960 }, { "epoch": 12.32, "grad_norm": 0.19767874479293823, "learning_rate": 2.132252077785847e-05, "loss": 0.0129, "step": 16970 }, { "epoch": 12.33, "grad_norm": 0.006283420603722334, "learning_rate": 2.130234809973372e-05, "loss": 0.0035, "step": 16980 }, { "epoch": 12.33, "grad_norm": 0.002427824307233095, "learning_rate": 2.1282175421608975e-05, "loss": 0.0032, "step": 16990 }, { "epoch": 12.34, "grad_norm": 0.07163175940513611, "learning_rate": 2.1262002743484226e-05, "loss": 0.0042, "step": 17000 }, { "epoch": 12.35, "grad_norm": 0.060925085097551346, "learning_rate": 2.1241830065359477e-05, "loss": 0.0082, "step": 17010 }, { "epoch": 12.36, "grad_norm": 0.02180366963148117, "learning_rate": 2.122165738723473e-05, "loss": 0.0035, "step": 17020 }, { "epoch": 12.36, "grad_norm": 11.230984687805176, "learning_rate": 2.1201484709109983e-05, "loss": 0.0131, "step": 17030 }, { "epoch": 12.37, "grad_norm": 0.0020128083415329456, "learning_rate": 2.1181312030985234e-05, "loss": 0.002, "step": 17040 }, { "epoch": 12.38, "grad_norm": 18.336151123046875, "learning_rate": 2.1161139352860488e-05, "loss": 0.0039, "step": 17050 }, { "epoch": 12.38, "grad_norm": 0.0012726852437481284, "learning_rate": 2.114096667473574e-05, "loss": 0.0022, "step": 17060 }, { "epoch": 12.39, "grad_norm": 0.005078029818832874, "learning_rate": 2.112079399661099e-05, "loss": 0.0, "step": 17070 }, { "epoch": 12.4, "grad_norm": 0.0027781687676906586, "learning_rate": 2.1100621318486245e-05, "loss": 0.0038, "step": 17080 }, { "epoch": 12.41, "grad_norm": 0.002532773185521364, "learning_rate": 2.1080448640361496e-05, "loss": 0.0079, "step": 17090 }, { "epoch": 12.41, "grad_norm": 0.006265075411647558, "learning_rate": 2.1060275962236747e-05, "loss": 0.007, "step": 17100 }, { "epoch": 12.42, "grad_norm": 14.577162742614746, "learning_rate": 2.1040103284112e-05, "loss": 0.007, "step": 17110 }, { "epoch": 12.43, "grad_norm": 0.0002733594155870378, "learning_rate": 2.101993060598725e-05, "loss": 0.0069, "step": 17120 }, { "epoch": 12.44, "grad_norm": 0.015362723730504513, "learning_rate": 2.0999757927862504e-05, "loss": 0.0023, "step": 17130 }, { "epoch": 12.44, "grad_norm": 0.17776289582252502, "learning_rate": 2.0979585249737758e-05, "loss": 0.0211, "step": 17140 }, { "epoch": 12.45, "grad_norm": 0.10553798824548721, "learning_rate": 2.0959412571613006e-05, "loss": 0.0032, "step": 17150 }, { "epoch": 12.46, "grad_norm": 2.3914291858673096, "learning_rate": 2.093923989348826e-05, "loss": 0.0055, "step": 17160 }, { "epoch": 12.46, "grad_norm": 0.013143391348421574, "learning_rate": 2.0919067215363515e-05, "loss": 0.0106, "step": 17170 }, { "epoch": 12.47, "grad_norm": 0.07704094797372818, "learning_rate": 2.0898894537238762e-05, "loss": 0.0082, "step": 17180 }, { "epoch": 12.48, "grad_norm": 0.0821012333035469, "learning_rate": 2.0878721859114017e-05, "loss": 0.0055, "step": 17190 }, { "epoch": 12.49, "grad_norm": 0.0025505193043500185, "learning_rate": 2.085854918098927e-05, "loss": 0.0074, "step": 17200 }, { "epoch": 12.49, "grad_norm": 0.154500812292099, "learning_rate": 2.083837650286452e-05, "loss": 0.0037, "step": 17210 }, { "epoch": 12.5, "grad_norm": 0.0007235849625431001, "learning_rate": 2.0818203824739773e-05, "loss": 0.0025, "step": 17220 }, { "epoch": 12.51, "grad_norm": 0.10713616758584976, "learning_rate": 2.0798031146615028e-05, "loss": 0.0096, "step": 17230 }, { "epoch": 12.52, "grad_norm": 0.00021158994059078395, "learning_rate": 2.0777858468490276e-05, "loss": 0.0017, "step": 17240 }, { "epoch": 12.52, "grad_norm": 0.00012255563342478126, "learning_rate": 2.075768579036553e-05, "loss": 0.0015, "step": 17250 }, { "epoch": 12.53, "grad_norm": 0.04050791636109352, "learning_rate": 2.0737513112240785e-05, "loss": 0.0009, "step": 17260 }, { "epoch": 12.54, "grad_norm": 0.12903136014938354, "learning_rate": 2.0717340434116032e-05, "loss": 0.0036, "step": 17270 }, { "epoch": 12.54, "grad_norm": 0.0034476914443075657, "learning_rate": 2.0697167755991287e-05, "loss": 0.014, "step": 17280 }, { "epoch": 12.55, "grad_norm": 1.9991146326065063, "learning_rate": 2.0676995077866538e-05, "loss": 0.0057, "step": 17290 }, { "epoch": 12.56, "grad_norm": 0.5727086067199707, "learning_rate": 2.065682239974179e-05, "loss": 0.0048, "step": 17300 }, { "epoch": 12.57, "grad_norm": 0.027912188321352005, "learning_rate": 2.0636649721617043e-05, "loss": 0.0036, "step": 17310 }, { "epoch": 12.57, "grad_norm": 0.000993537250906229, "learning_rate": 2.0616477043492294e-05, "loss": 0.0001, "step": 17320 }, { "epoch": 12.58, "grad_norm": 0.00026250266819261014, "learning_rate": 2.0596304365367546e-05, "loss": 0.0097, "step": 17330 }, { "epoch": 12.59, "grad_norm": 0.00026831813738681376, "learning_rate": 2.05761316872428e-05, "loss": 0.0051, "step": 17340 }, { "epoch": 12.6, "grad_norm": 0.0001645336305955425, "learning_rate": 2.055595900911805e-05, "loss": 0.0054, "step": 17350 }, { "epoch": 12.6, "grad_norm": 0.0002212459803558886, "learning_rate": 2.0535786330993302e-05, "loss": 0.0276, "step": 17360 }, { "epoch": 12.61, "grad_norm": 0.002810519188642502, "learning_rate": 2.0515613652868557e-05, "loss": 0.0324, "step": 17370 }, { "epoch": 12.62, "grad_norm": 0.0005311826826073229, "learning_rate": 2.0495440974743808e-05, "loss": 0.0031, "step": 17380 }, { "epoch": 12.62, "grad_norm": 0.14589981734752655, "learning_rate": 2.047526829661906e-05, "loss": 0.0081, "step": 17390 }, { "epoch": 12.63, "grad_norm": 0.0009502907050773501, "learning_rate": 2.0455095618494313e-05, "loss": 0.0058, "step": 17400 }, { "epoch": 12.64, "grad_norm": 0.054477520287036896, "learning_rate": 2.0434922940369564e-05, "loss": 0.0018, "step": 17410 }, { "epoch": 12.65, "grad_norm": 0.061193205416202545, "learning_rate": 2.0414750262244815e-05, "loss": 0.0022, "step": 17420 }, { "epoch": 12.65, "grad_norm": 1.0594874620437622, "learning_rate": 2.039457758412007e-05, "loss": 0.0064, "step": 17430 }, { "epoch": 12.66, "grad_norm": 0.0070306709967553616, "learning_rate": 2.037440490599532e-05, "loss": 0.0019, "step": 17440 }, { "epoch": 12.67, "grad_norm": 0.0007211442571133375, "learning_rate": 2.0354232227870572e-05, "loss": 0.0055, "step": 17450 }, { "epoch": 12.68, "grad_norm": 0.9333264231681824, "learning_rate": 2.0334059549745826e-05, "loss": 0.0186, "step": 17460 }, { "epoch": 12.68, "grad_norm": 0.0005025692516937852, "learning_rate": 2.0313886871621078e-05, "loss": 0.0021, "step": 17470 }, { "epoch": 12.69, "grad_norm": 0.002434327732771635, "learning_rate": 2.029371419349633e-05, "loss": 0.0042, "step": 17480 }, { "epoch": 12.7, "grad_norm": 0.030645696446299553, "learning_rate": 2.0273541515371583e-05, "loss": 0.0039, "step": 17490 }, { "epoch": 12.7, "grad_norm": 0.006995252333581448, "learning_rate": 2.0253368837246834e-05, "loss": 0.0182, "step": 17500 }, { "epoch": 12.71, "grad_norm": 36.959693908691406, "learning_rate": 2.0233196159122085e-05, "loss": 0.0159, "step": 17510 }, { "epoch": 12.72, "grad_norm": 0.05816148966550827, "learning_rate": 2.021302348099734e-05, "loss": 0.0136, "step": 17520 }, { "epoch": 12.73, "grad_norm": 0.4665600061416626, "learning_rate": 2.019285080287259e-05, "loss": 0.0167, "step": 17530 }, { "epoch": 12.73, "grad_norm": 0.008300243876874447, "learning_rate": 2.0172678124747842e-05, "loss": 0.0069, "step": 17540 }, { "epoch": 12.74, "grad_norm": 0.026556752622127533, "learning_rate": 2.0152505446623093e-05, "loss": 0.012, "step": 17550 }, { "epoch": 12.75, "grad_norm": 0.00357626099139452, "learning_rate": 2.0132332768498347e-05, "loss": 0.0114, "step": 17560 }, { "epoch": 12.75, "grad_norm": 0.22881974279880524, "learning_rate": 2.01121600903736e-05, "loss": 0.0039, "step": 17570 }, { "epoch": 12.76, "grad_norm": 0.1387658715248108, "learning_rate": 2.009198741224885e-05, "loss": 0.001, "step": 17580 }, { "epoch": 12.77, "grad_norm": 0.0008824353571981192, "learning_rate": 2.0071814734124104e-05, "loss": 0.0039, "step": 17590 }, { "epoch": 12.78, "grad_norm": 39.46518325805664, "learning_rate": 2.0051642055999355e-05, "loss": 0.0112, "step": 17600 }, { "epoch": 12.78, "grad_norm": 0.006979393772780895, "learning_rate": 2.0031469377874606e-05, "loss": 0.0065, "step": 17610 }, { "epoch": 12.79, "grad_norm": 0.0024195117875933647, "learning_rate": 2.001129669974986e-05, "loss": 0.0006, "step": 17620 }, { "epoch": 12.8, "grad_norm": 0.0011451839236542583, "learning_rate": 1.9991124021625112e-05, "loss": 0.0136, "step": 17630 }, { "epoch": 12.81, "grad_norm": 0.06088784337043762, "learning_rate": 1.9970951343500363e-05, "loss": 0.0054, "step": 17640 }, { "epoch": 12.81, "grad_norm": 0.0037643101532012224, "learning_rate": 1.9950778665375617e-05, "loss": 0.0045, "step": 17650 }, { "epoch": 12.82, "grad_norm": 0.06409800797700882, "learning_rate": 1.993060598725087e-05, "loss": 0.0015, "step": 17660 }, { "epoch": 12.83, "grad_norm": 0.010507245548069477, "learning_rate": 1.991043330912612e-05, "loss": 0.006, "step": 17670 }, { "epoch": 12.83, "grad_norm": 3.360480785369873, "learning_rate": 1.9890260631001374e-05, "loss": 0.0016, "step": 17680 }, { "epoch": 12.84, "grad_norm": 0.10210923850536346, "learning_rate": 1.9870087952876625e-05, "loss": 0.0131, "step": 17690 }, { "epoch": 12.85, "grad_norm": 0.00693171750754118, "learning_rate": 1.9849915274751876e-05, "loss": 0.0089, "step": 17700 }, { "epoch": 12.86, "grad_norm": 0.0036455222871154547, "learning_rate": 1.982974259662713e-05, "loss": 0.006, "step": 17710 }, { "epoch": 12.86, "grad_norm": 16.66859245300293, "learning_rate": 1.980956991850238e-05, "loss": 0.0193, "step": 17720 }, { "epoch": 12.87, "grad_norm": 0.0002460737305227667, "learning_rate": 1.9789397240377633e-05, "loss": 0.0047, "step": 17730 }, { "epoch": 12.88, "grad_norm": 0.002102719387039542, "learning_rate": 1.9769224562252887e-05, "loss": 0.0202, "step": 17740 }, { "epoch": 12.89, "grad_norm": 27.401649475097656, "learning_rate": 1.9749051884128138e-05, "loss": 0.0046, "step": 17750 }, { "epoch": 12.89, "grad_norm": 0.007228259928524494, "learning_rate": 1.972887920600339e-05, "loss": 0.0047, "step": 17760 }, { "epoch": 12.9, "grad_norm": 0.25575822591781616, "learning_rate": 1.9708706527878644e-05, "loss": 0.0009, "step": 17770 }, { "epoch": 12.91, "grad_norm": 0.9766779541969299, "learning_rate": 1.968853384975389e-05, "loss": 0.0113, "step": 17780 }, { "epoch": 12.91, "grad_norm": 0.07483760267496109, "learning_rate": 1.9668361171629146e-05, "loss": 0.0107, "step": 17790 }, { "epoch": 12.92, "grad_norm": 0.0007129976293072104, "learning_rate": 1.96481884935044e-05, "loss": 0.0118, "step": 17800 }, { "epoch": 12.93, "grad_norm": 0.0688149631023407, "learning_rate": 1.9628015815379648e-05, "loss": 0.0131, "step": 17810 }, { "epoch": 12.94, "grad_norm": 0.04838823899626732, "learning_rate": 1.9607843137254903e-05, "loss": 0.0012, "step": 17820 }, { "epoch": 12.94, "grad_norm": 0.0014652046374976635, "learning_rate": 1.9587670459130157e-05, "loss": 0.0106, "step": 17830 }, { "epoch": 12.95, "grad_norm": 0.0027166057843714952, "learning_rate": 1.9567497781005405e-05, "loss": 0.007, "step": 17840 }, { "epoch": 12.96, "grad_norm": 0.0015533966943621635, "learning_rate": 1.954732510288066e-05, "loss": 0.0032, "step": 17850 }, { "epoch": 12.97, "grad_norm": 0.05134255439043045, "learning_rate": 1.9527152424755914e-05, "loss": 0.0028, "step": 17860 }, { "epoch": 12.97, "grad_norm": 0.10609173774719238, "learning_rate": 1.950697974663116e-05, "loss": 0.0023, "step": 17870 }, { "epoch": 12.98, "grad_norm": 0.17367728054523468, "learning_rate": 1.9486807068506416e-05, "loss": 0.0087, "step": 17880 }, { "epoch": 12.99, "grad_norm": 0.16967636346817017, "learning_rate": 1.946663439038167e-05, "loss": 0.0098, "step": 17890 }, { "epoch": 12.99, "grad_norm": 0.059492193162441254, "learning_rate": 1.9446461712256918e-05, "loss": 0.0058, "step": 17900 }, { "epoch": 13.0, "eval_accuracy": 0.9983096610246404, "eval_f1": 0.9960987615531642, "eval_loss": 0.0034948072861880064, "eval_precision": 0.9992120607238536, "eval_recall": 0.9930048026727918, "eval_roc_auc": 0.9999857662326282, "eval_runtime": 385.943, "eval_samples_per_second": 228.396, "eval_steps_per_second": 14.277, "step": 17907 }, { "epoch": 13.0, "grad_norm": 0.0003739891981240362, "learning_rate": 1.9426289034132172e-05, "loss": 0.013, "step": 17910 }, { "epoch": 13.01, "grad_norm": 0.0007324207108467817, "learning_rate": 1.9406116356007427e-05, "loss": 0.0059, "step": 17920 }, { "epoch": 13.02, "grad_norm": 0.20080474019050598, "learning_rate": 1.9385943677882675e-05, "loss": 0.0055, "step": 17930 }, { "epoch": 13.02, "grad_norm": 0.1485166698694229, "learning_rate": 1.936577099975793e-05, "loss": 0.0103, "step": 17940 }, { "epoch": 13.03, "grad_norm": 0.0007888617110438645, "learning_rate": 1.9345598321633183e-05, "loss": 0.0026, "step": 17950 }, { "epoch": 13.04, "grad_norm": 1.7149608135223389, "learning_rate": 1.932542564350843e-05, "loss": 0.0041, "step": 17960 }, { "epoch": 13.05, "grad_norm": 0.19555199146270752, "learning_rate": 1.9305252965383686e-05, "loss": 0.0126, "step": 17970 }, { "epoch": 13.05, "grad_norm": 0.0028395382687449455, "learning_rate": 1.928508028725894e-05, "loss": 0.0007, "step": 17980 }, { "epoch": 13.06, "grad_norm": 0.0003235260955989361, "learning_rate": 1.9264907609134188e-05, "loss": 0.0038, "step": 17990 }, { "epoch": 13.07, "grad_norm": 0.010898897424340248, "learning_rate": 1.9244734931009442e-05, "loss": 0.0009, "step": 18000 }, { "epoch": 13.07, "grad_norm": 0.29816633462905884, "learning_rate": 1.9224562252884697e-05, "loss": 0.0012, "step": 18010 }, { "epoch": 13.08, "grad_norm": 0.013279824517667294, "learning_rate": 1.9204389574759944e-05, "loss": 0.0006, "step": 18020 }, { "epoch": 13.09, "grad_norm": 0.0002779340138658881, "learning_rate": 1.91842168966352e-05, "loss": 0.0179, "step": 18030 }, { "epoch": 13.1, "grad_norm": 30.59058952331543, "learning_rate": 1.916404421851045e-05, "loss": 0.0066, "step": 18040 }, { "epoch": 13.1, "grad_norm": 0.1223394125699997, "learning_rate": 1.91438715403857e-05, "loss": 0.0031, "step": 18050 }, { "epoch": 13.11, "grad_norm": 0.0008338306797668338, "learning_rate": 1.9123698862260955e-05, "loss": 0.0025, "step": 18060 }, { "epoch": 13.12, "grad_norm": 0.012727318331599236, "learning_rate": 1.9103526184136207e-05, "loss": 0.01, "step": 18070 }, { "epoch": 13.13, "grad_norm": 0.1314314305782318, "learning_rate": 1.9083353506011458e-05, "loss": 0.0077, "step": 18080 }, { "epoch": 13.13, "grad_norm": 0.011394386179745197, "learning_rate": 1.9063180827886712e-05, "loss": 0.0089, "step": 18090 }, { "epoch": 13.14, "grad_norm": 0.049658820033073425, "learning_rate": 1.9043008149761963e-05, "loss": 0.0001, "step": 18100 }, { "epoch": 13.15, "grad_norm": 0.021471910178661346, "learning_rate": 1.9022835471637214e-05, "loss": 0.0036, "step": 18110 }, { "epoch": 13.15, "grad_norm": 8.542447090148926, "learning_rate": 1.900266279351247e-05, "loss": 0.0075, "step": 18120 }, { "epoch": 13.16, "grad_norm": 3.939650774002075, "learning_rate": 1.898249011538772e-05, "loss": 0.0084, "step": 18130 }, { "epoch": 13.17, "grad_norm": 0.10803334414958954, "learning_rate": 1.896231743726297e-05, "loss": 0.0033, "step": 18140 }, { "epoch": 13.18, "grad_norm": 0.0016341025475412607, "learning_rate": 1.8942144759138225e-05, "loss": 0.0006, "step": 18150 }, { "epoch": 13.18, "grad_norm": 0.001856721006333828, "learning_rate": 1.8921972081013476e-05, "loss": 0.001, "step": 18160 }, { "epoch": 13.19, "grad_norm": 0.0011930714827030897, "learning_rate": 1.8901799402888727e-05, "loss": 0.0006, "step": 18170 }, { "epoch": 13.2, "grad_norm": 0.0006116937729530036, "learning_rate": 1.8881626724763982e-05, "loss": 0.0159, "step": 18180 }, { "epoch": 13.21, "grad_norm": 0.0016800108132883906, "learning_rate": 1.8861454046639233e-05, "loss": 0.014, "step": 18190 }, { "epoch": 13.21, "grad_norm": 0.23176616430282593, "learning_rate": 1.8841281368514484e-05, "loss": 0.0028, "step": 18200 }, { "epoch": 13.22, "grad_norm": 0.05328686162829399, "learning_rate": 1.882110869038974e-05, "loss": 0.0011, "step": 18210 }, { "epoch": 13.23, "grad_norm": 0.014226214028894901, "learning_rate": 1.880093601226499e-05, "loss": 0.0037, "step": 18220 }, { "epoch": 13.23, "grad_norm": 0.002094144467264414, "learning_rate": 1.878076333414024e-05, "loss": 0.0023, "step": 18230 }, { "epoch": 13.24, "grad_norm": 0.004210632294416428, "learning_rate": 1.8760590656015495e-05, "loss": 0.0125, "step": 18240 }, { "epoch": 13.25, "grad_norm": 0.44930320978164673, "learning_rate": 1.8740417977890746e-05, "loss": 0.0025, "step": 18250 }, { "epoch": 13.26, "grad_norm": 0.7001730799674988, "learning_rate": 1.8720245299765997e-05, "loss": 0.0127, "step": 18260 }, { "epoch": 13.26, "grad_norm": 0.019284186884760857, "learning_rate": 1.870007262164125e-05, "loss": 0.0021, "step": 18270 }, { "epoch": 13.27, "grad_norm": 0.0007997555076144636, "learning_rate": 1.8679899943516503e-05, "loss": 0.008, "step": 18280 }, { "epoch": 13.28, "grad_norm": 0.004993034061044455, "learning_rate": 1.8659727265391754e-05, "loss": 0.005, "step": 18290 }, { "epoch": 13.28, "grad_norm": 0.0015475050313398242, "learning_rate": 1.8639554587267005e-05, "loss": 0.0113, "step": 18300 }, { "epoch": 13.29, "grad_norm": 0.0004995432100258768, "learning_rate": 1.861938190914226e-05, "loss": 0.0014, "step": 18310 }, { "epoch": 13.3, "grad_norm": 0.0007782830507494509, "learning_rate": 1.859920923101751e-05, "loss": 0.0044, "step": 18320 }, { "epoch": 13.31, "grad_norm": 0.043515272438526154, "learning_rate": 1.857903655289276e-05, "loss": 0.0028, "step": 18330 }, { "epoch": 13.31, "grad_norm": 0.2453761100769043, "learning_rate": 1.8558863874768016e-05, "loss": 0.0087, "step": 18340 }, { "epoch": 13.32, "grad_norm": 0.00025056241429410875, "learning_rate": 1.8538691196643267e-05, "loss": 0.0115, "step": 18350 }, { "epoch": 13.33, "grad_norm": 0.003255044110119343, "learning_rate": 1.8518518518518518e-05, "loss": 0.0026, "step": 18360 }, { "epoch": 13.34, "grad_norm": 0.0290953628718853, "learning_rate": 1.8498345840393773e-05, "loss": 0.0037, "step": 18370 }, { "epoch": 13.34, "grad_norm": 0.0034771724604070187, "learning_rate": 1.8478173162269024e-05, "loss": 0.0122, "step": 18380 }, { "epoch": 13.35, "grad_norm": 0.001946158241480589, "learning_rate": 1.8458000484144275e-05, "loss": 0.0146, "step": 18390 }, { "epoch": 13.36, "grad_norm": 0.0008432241738773882, "learning_rate": 1.843782780601953e-05, "loss": 0.0098, "step": 18400 }, { "epoch": 13.36, "grad_norm": 0.006002933252602816, "learning_rate": 1.841765512789478e-05, "loss": 0.0354, "step": 18410 }, { "epoch": 13.37, "grad_norm": 0.0009837823454290628, "learning_rate": 1.839748244977003e-05, "loss": 0.0053, "step": 18420 }, { "epoch": 13.38, "grad_norm": 0.21960854530334473, "learning_rate": 1.8377309771645286e-05, "loss": 0.0066, "step": 18430 }, { "epoch": 13.39, "grad_norm": 0.0019889490213245153, "learning_rate": 1.8357137093520537e-05, "loss": 0.0036, "step": 18440 }, { "epoch": 13.39, "grad_norm": 0.02662692405283451, "learning_rate": 1.8336964415395788e-05, "loss": 0.0007, "step": 18450 }, { "epoch": 13.4, "grad_norm": 0.0018065626500174403, "learning_rate": 1.8316791737271043e-05, "loss": 0.0056, "step": 18460 }, { "epoch": 13.41, "grad_norm": 0.061746492981910706, "learning_rate": 1.8296619059146294e-05, "loss": 0.0031, "step": 18470 }, { "epoch": 13.42, "grad_norm": 0.027934031561017036, "learning_rate": 1.8276446381021545e-05, "loss": 0.0003, "step": 18480 }, { "epoch": 13.42, "grad_norm": 4.120459079742432, "learning_rate": 1.82562737028968e-05, "loss": 0.02, "step": 18490 }, { "epoch": 13.43, "grad_norm": 0.0340251624584198, "learning_rate": 1.823610102477205e-05, "loss": 0.0047, "step": 18500 }, { "epoch": 13.44, "grad_norm": 0.11251964420080185, "learning_rate": 1.82159283466473e-05, "loss": 0.0012, "step": 18510 }, { "epoch": 13.44, "grad_norm": 0.048865534365177155, "learning_rate": 1.8195755668522556e-05, "loss": 0.004, "step": 18520 }, { "epoch": 13.45, "grad_norm": 0.0008931795600801706, "learning_rate": 1.8175582990397804e-05, "loss": 0.0104, "step": 18530 }, { "epoch": 13.46, "grad_norm": 0.00030427967431023717, "learning_rate": 1.8155410312273058e-05, "loss": 0.0124, "step": 18540 }, { "epoch": 13.47, "grad_norm": 14.171956062316895, "learning_rate": 1.8135237634148312e-05, "loss": 0.0032, "step": 18550 }, { "epoch": 13.47, "grad_norm": 0.10234571248292923, "learning_rate": 1.811506495602356e-05, "loss": 0.0089, "step": 18560 }, { "epoch": 13.48, "grad_norm": 0.15911656618118286, "learning_rate": 1.8094892277898815e-05, "loss": 0.0044, "step": 18570 }, { "epoch": 13.49, "grad_norm": 0.0011252342956140637, "learning_rate": 1.807471959977407e-05, "loss": 0.0036, "step": 18580 }, { "epoch": 13.5, "grad_norm": 0.00042991613736376166, "learning_rate": 1.8054546921649317e-05, "loss": 0.0082, "step": 18590 }, { "epoch": 13.5, "grad_norm": 0.0018707435810938478, "learning_rate": 1.803437424352457e-05, "loss": 0.0062, "step": 18600 }, { "epoch": 13.51, "grad_norm": 30.47728729248047, "learning_rate": 1.8014201565399826e-05, "loss": 0.0159, "step": 18610 }, { "epoch": 13.52, "grad_norm": 0.1323844939470291, "learning_rate": 1.7994028887275073e-05, "loss": 0.0087, "step": 18620 }, { "epoch": 13.52, "grad_norm": 0.0003529054229147732, "learning_rate": 1.7973856209150328e-05, "loss": 0.0002, "step": 18630 }, { "epoch": 13.53, "grad_norm": 0.02061094157397747, "learning_rate": 1.7953683531025582e-05, "loss": 0.0053, "step": 18640 }, { "epoch": 13.54, "grad_norm": 0.015290017239749432, "learning_rate": 1.793351085290083e-05, "loss": 0.0013, "step": 18650 }, { "epoch": 13.55, "grad_norm": 0.0006153315771371126, "learning_rate": 1.7913338174776084e-05, "loss": 0.0014, "step": 18660 }, { "epoch": 13.55, "grad_norm": 0.0002869318414013833, "learning_rate": 1.789316549665134e-05, "loss": 0.0099, "step": 18670 }, { "epoch": 13.56, "grad_norm": 0.000607367604970932, "learning_rate": 1.7872992818526587e-05, "loss": 0.0034, "step": 18680 }, { "epoch": 13.57, "grad_norm": 0.13318461179733276, "learning_rate": 1.785282014040184e-05, "loss": 0.0033, "step": 18690 }, { "epoch": 13.58, "grad_norm": 0.0001578326482558623, "learning_rate": 1.7832647462277096e-05, "loss": 0.0082, "step": 18700 }, { "epoch": 13.58, "grad_norm": 0.037437982857227325, "learning_rate": 1.7812474784152343e-05, "loss": 0.0061, "step": 18710 }, { "epoch": 13.59, "grad_norm": 3.765977144241333, "learning_rate": 1.7792302106027598e-05, "loss": 0.0144, "step": 18720 }, { "epoch": 13.6, "grad_norm": 0.0014689884847030044, "learning_rate": 1.7772129427902852e-05, "loss": 0.0011, "step": 18730 }, { "epoch": 13.6, "grad_norm": 0.05242501571774483, "learning_rate": 1.77519567497781e-05, "loss": 0.0048, "step": 18740 }, { "epoch": 13.61, "grad_norm": 0.03251238167285919, "learning_rate": 1.7731784071653354e-05, "loss": 0.0041, "step": 18750 }, { "epoch": 13.62, "grad_norm": 0.0015125583158805966, "learning_rate": 1.7711611393528605e-05, "loss": 0.0069, "step": 18760 }, { "epoch": 13.63, "grad_norm": 0.0005884718266315758, "learning_rate": 1.7691438715403856e-05, "loss": 0.0047, "step": 18770 }, { "epoch": 13.63, "grad_norm": 0.17881019413471222, "learning_rate": 1.767126603727911e-05, "loss": 0.0029, "step": 18780 }, { "epoch": 13.64, "grad_norm": 0.1586664319038391, "learning_rate": 1.7651093359154362e-05, "loss": 0.0007, "step": 18790 }, { "epoch": 13.65, "grad_norm": 0.004922006744891405, "learning_rate": 1.7630920681029613e-05, "loss": 0.0035, "step": 18800 }, { "epoch": 13.66, "grad_norm": 1.1970666646957397, "learning_rate": 1.7610748002904868e-05, "loss": 0.0009, "step": 18810 }, { "epoch": 13.66, "grad_norm": 0.04897474870085716, "learning_rate": 1.759057532478012e-05, "loss": 0.0054, "step": 18820 }, { "epoch": 13.67, "grad_norm": 0.000665748433675617, "learning_rate": 1.757040264665537e-05, "loss": 0.0049, "step": 18830 }, { "epoch": 13.68, "grad_norm": 0.11206013709306717, "learning_rate": 1.7550229968530624e-05, "loss": 0.0063, "step": 18840 }, { "epoch": 13.68, "grad_norm": 0.0007113315514288843, "learning_rate": 1.7530057290405875e-05, "loss": 0.0205, "step": 18850 }, { "epoch": 13.69, "grad_norm": 0.0034957346506416798, "learning_rate": 1.7509884612281126e-05, "loss": 0.0025, "step": 18860 }, { "epoch": 13.7, "grad_norm": 0.0020177995320409536, "learning_rate": 1.748971193415638e-05, "loss": 0.0015, "step": 18870 }, { "epoch": 13.71, "grad_norm": 6.311367034912109, "learning_rate": 1.7469539256031632e-05, "loss": 0.0022, "step": 18880 }, { "epoch": 13.71, "grad_norm": 0.0005880764219909906, "learning_rate": 1.7449366577906883e-05, "loss": 0.0015, "step": 18890 }, { "epoch": 13.72, "grad_norm": 0.10973332077264786, "learning_rate": 1.7429193899782137e-05, "loss": 0.0182, "step": 18900 }, { "epoch": 13.73, "grad_norm": 0.000982865341939032, "learning_rate": 1.740902122165739e-05, "loss": 0.0126, "step": 18910 }, { "epoch": 13.74, "grad_norm": 0.509669303894043, "learning_rate": 1.738884854353264e-05, "loss": 0.0124, "step": 18920 }, { "epoch": 13.74, "grad_norm": 0.05541288107633591, "learning_rate": 1.7368675865407894e-05, "loss": 0.0326, "step": 18930 }, { "epoch": 13.75, "grad_norm": 0.006566929165273905, "learning_rate": 1.7348503187283145e-05, "loss": 0.0256, "step": 18940 }, { "epoch": 13.76, "grad_norm": 1.1299678087234497, "learning_rate": 1.7328330509158396e-05, "loss": 0.014, "step": 18950 }, { "epoch": 13.76, "grad_norm": 0.0010074771707877517, "learning_rate": 1.730815783103365e-05, "loss": 0.0024, "step": 18960 }, { "epoch": 13.77, "grad_norm": 0.0006316117942333221, "learning_rate": 1.7287985152908902e-05, "loss": 0.0, "step": 18970 }, { "epoch": 13.78, "grad_norm": 21.051273345947266, "learning_rate": 1.7267812474784153e-05, "loss": 0.015, "step": 18980 }, { "epoch": 13.79, "grad_norm": 0.0021583428606390953, "learning_rate": 1.7247639796659407e-05, "loss": 0.0045, "step": 18990 }, { "epoch": 13.79, "grad_norm": 0.13025076687335968, "learning_rate": 1.722746711853466e-05, "loss": 0.0054, "step": 19000 }, { "epoch": 13.8, "grad_norm": 0.06940227001905441, "learning_rate": 1.720729444040991e-05, "loss": 0.0279, "step": 19010 }, { "epoch": 13.81, "grad_norm": 1.8406766653060913, "learning_rate": 1.718712176228516e-05, "loss": 0.0059, "step": 19020 }, { "epoch": 13.81, "grad_norm": 0.36772218346595764, "learning_rate": 1.7166949084160415e-05, "loss": 0.004, "step": 19030 }, { "epoch": 13.82, "grad_norm": 0.2510613799095154, "learning_rate": 1.7146776406035666e-05, "loss": 0.0046, "step": 19040 }, { "epoch": 13.83, "grad_norm": 0.0453202947974205, "learning_rate": 1.7126603727910917e-05, "loss": 0.0037, "step": 19050 }, { "epoch": 13.84, "grad_norm": 0.002687858883291483, "learning_rate": 1.710643104978617e-05, "loss": 0.0062, "step": 19060 }, { "epoch": 13.84, "grad_norm": 0.004243039526045322, "learning_rate": 1.7086258371661423e-05, "loss": 0.0056, "step": 19070 }, { "epoch": 13.85, "grad_norm": 0.008846227079629898, "learning_rate": 1.7066085693536674e-05, "loss": 0.0083, "step": 19080 }, { "epoch": 13.86, "grad_norm": 1.1003198623657227, "learning_rate": 1.7045913015411928e-05, "loss": 0.0051, "step": 19090 }, { "epoch": 13.87, "grad_norm": 0.08140759915113449, "learning_rate": 1.702574033728718e-05, "loss": 0.017, "step": 19100 }, { "epoch": 13.87, "grad_norm": 0.0035266373306512833, "learning_rate": 1.700556765916243e-05, "loss": 0.0051, "step": 19110 }, { "epoch": 13.88, "grad_norm": 0.0657566487789154, "learning_rate": 1.6985394981037685e-05, "loss": 0.0026, "step": 19120 }, { "epoch": 13.89, "grad_norm": 0.0039032178465276957, "learning_rate": 1.6965222302912936e-05, "loss": 0.0043, "step": 19130 }, { "epoch": 13.89, "grad_norm": 0.11626137048006058, "learning_rate": 1.6945049624788187e-05, "loss": 0.0046, "step": 19140 }, { "epoch": 13.9, "grad_norm": 0.21265415847301483, "learning_rate": 1.692487694666344e-05, "loss": 0.0026, "step": 19150 }, { "epoch": 13.91, "grad_norm": 0.26542073488235474, "learning_rate": 1.6904704268538693e-05, "loss": 0.0012, "step": 19160 }, { "epoch": 13.92, "grad_norm": 0.0003108033852186054, "learning_rate": 1.6884531590413944e-05, "loss": 0.0054, "step": 19170 }, { "epoch": 13.92, "grad_norm": 0.012170134112238884, "learning_rate": 1.6864358912289198e-05, "loss": 0.0046, "step": 19180 }, { "epoch": 13.93, "grad_norm": 0.17962482571601868, "learning_rate": 1.684418623416445e-05, "loss": 0.0032, "step": 19190 }, { "epoch": 13.94, "grad_norm": 0.0008012820617295802, "learning_rate": 1.68240135560397e-05, "loss": 0.0006, "step": 19200 }, { "epoch": 13.95, "grad_norm": 0.0011188456555828452, "learning_rate": 1.6803840877914955e-05, "loss": 0.012, "step": 19210 }, { "epoch": 13.95, "grad_norm": 0.0029843682423233986, "learning_rate": 1.6783668199790206e-05, "loss": 0.0084, "step": 19220 }, { "epoch": 13.96, "grad_norm": 0.006473259534686804, "learning_rate": 1.6763495521665457e-05, "loss": 0.0023, "step": 19230 }, { "epoch": 13.97, "grad_norm": 0.032545410096645355, "learning_rate": 1.674332284354071e-05, "loss": 0.0076, "step": 19240 }, { "epoch": 13.97, "grad_norm": 0.024446191266179085, "learning_rate": 1.6723150165415962e-05, "loss": 0.0106, "step": 19250 }, { "epoch": 13.98, "grad_norm": 26.21831512451172, "learning_rate": 1.6702977487291213e-05, "loss": 0.0092, "step": 19260 }, { "epoch": 13.99, "grad_norm": 0.05417324975132942, "learning_rate": 1.6682804809166465e-05, "loss": 0.0044, "step": 19270 }, { "epoch": 14.0, "grad_norm": 0.12691135704517365, "learning_rate": 1.6662632131041716e-05, "loss": 0.0076, "step": 19280 }, { "epoch": 14.0, "eval_accuracy": 0.9981394926714162, "eval_f1": 0.9957034320146712, "eval_loss": 0.004028408322483301, "eval_precision": 0.9994214789102767, "eval_recall": 0.9920129463353519, "eval_roc_auc": 0.9999826737941067, "eval_runtime": 389.171, "eval_samples_per_second": 226.502, "eval_steps_per_second": 14.158, "step": 19285 }, { "epoch": 14.0, "grad_norm": 0.044279515743255615, "learning_rate": 1.664245945291697e-05, "loss": 0.003, "step": 19290 }, { "epoch": 14.01, "grad_norm": 0.061287716031074524, "learning_rate": 1.662228677479222e-05, "loss": 0.0004, "step": 19300 }, { "epoch": 14.02, "grad_norm": 5.015234470367432, "learning_rate": 1.6602114096667472e-05, "loss": 0.0062, "step": 19310 }, { "epoch": 14.03, "grad_norm": 0.03511728346347809, "learning_rate": 1.6581941418542727e-05, "loss": 0.0059, "step": 19320 }, { "epoch": 14.03, "grad_norm": 0.06877407431602478, "learning_rate": 1.6561768740417978e-05, "loss": 0.0008, "step": 19330 }, { "epoch": 14.04, "grad_norm": 0.00669504189863801, "learning_rate": 1.654159606229323e-05, "loss": 0.0002, "step": 19340 }, { "epoch": 14.05, "grad_norm": 1.7849411964416504, "learning_rate": 1.6521423384168483e-05, "loss": 0.0042, "step": 19350 }, { "epoch": 14.05, "grad_norm": 0.0018257235642522573, "learning_rate": 1.6501250706043734e-05, "loss": 0.0029, "step": 19360 }, { "epoch": 14.06, "grad_norm": 0.0007575092604383826, "learning_rate": 1.6481078027918986e-05, "loss": 0.0013, "step": 19370 }, { "epoch": 14.07, "grad_norm": 0.0004143484984524548, "learning_rate": 1.646090534979424e-05, "loss": 0.0048, "step": 19380 }, { "epoch": 14.08, "grad_norm": 12.652482032775879, "learning_rate": 1.644073267166949e-05, "loss": 0.0028, "step": 19390 }, { "epoch": 14.08, "grad_norm": 0.00011446201096987352, "learning_rate": 1.6420559993544742e-05, "loss": 0.0116, "step": 19400 }, { "epoch": 14.09, "grad_norm": 0.002371125388890505, "learning_rate": 1.6400387315419997e-05, "loss": 0.0141, "step": 19410 }, { "epoch": 14.1, "grad_norm": 0.026215413585305214, "learning_rate": 1.6380214637295248e-05, "loss": 0.0033, "step": 19420 }, { "epoch": 14.11, "grad_norm": 0.0014271169202402234, "learning_rate": 1.63600419591705e-05, "loss": 0.0014, "step": 19430 }, { "epoch": 14.11, "grad_norm": 0.00161796307656914, "learning_rate": 1.6339869281045753e-05, "loss": 0.0009, "step": 19440 }, { "epoch": 14.12, "grad_norm": 1.2255878448486328, "learning_rate": 1.6319696602921004e-05, "loss": 0.0022, "step": 19450 }, { "epoch": 14.13, "grad_norm": 0.004346283618360758, "learning_rate": 1.6299523924796255e-05, "loss": 0.0014, "step": 19460 }, { "epoch": 14.13, "grad_norm": 0.0006355916266329587, "learning_rate": 1.627935124667151e-05, "loss": 0.0143, "step": 19470 }, { "epoch": 14.14, "grad_norm": 0.11306034028530121, "learning_rate": 1.625917856854676e-05, "loss": 0.0038, "step": 19480 }, { "epoch": 14.15, "grad_norm": 0.00017098760872613639, "learning_rate": 1.6239005890422012e-05, "loss": 0.0028, "step": 19490 }, { "epoch": 14.16, "grad_norm": 0.18717147409915924, "learning_rate": 1.6218833212297266e-05, "loss": 0.0109, "step": 19500 }, { "epoch": 14.16, "grad_norm": 0.0023012091405689716, "learning_rate": 1.6198660534172518e-05, "loss": 0.0039, "step": 19510 }, { "epoch": 14.17, "grad_norm": 0.00090291682863608, "learning_rate": 1.617848785604777e-05, "loss": 0.0013, "step": 19520 }, { "epoch": 14.18, "grad_norm": 0.37925946712493896, "learning_rate": 1.6158315177923023e-05, "loss": 0.0113, "step": 19530 }, { "epoch": 14.19, "grad_norm": 0.003591743763536215, "learning_rate": 1.6138142499798274e-05, "loss": 0.0042, "step": 19540 }, { "epoch": 14.19, "grad_norm": 0.003399345325306058, "learning_rate": 1.6117969821673525e-05, "loss": 0.0001, "step": 19550 }, { "epoch": 14.2, "grad_norm": 0.006896769627928734, "learning_rate": 1.609779714354878e-05, "loss": 0.0007, "step": 19560 }, { "epoch": 14.21, "grad_norm": 0.01545551884919405, "learning_rate": 1.607762446542403e-05, "loss": 0.0006, "step": 19570 }, { "epoch": 14.21, "grad_norm": 0.08701088279485703, "learning_rate": 1.6057451787299282e-05, "loss": 0.0055, "step": 19580 }, { "epoch": 14.22, "grad_norm": 0.0003549981047399342, "learning_rate": 1.6037279109174536e-05, "loss": 0.003, "step": 19590 }, { "epoch": 14.23, "grad_norm": 0.05535199120640755, "learning_rate": 1.6017106431049787e-05, "loss": 0.0125, "step": 19600 }, { "epoch": 14.24, "grad_norm": 0.004172964952886105, "learning_rate": 1.599693375292504e-05, "loss": 0.0024, "step": 19610 }, { "epoch": 14.24, "grad_norm": 0.018433071672916412, "learning_rate": 1.5976761074800293e-05, "loss": 0.003, "step": 19620 }, { "epoch": 14.25, "grad_norm": 0.00013644646969623864, "learning_rate": 1.5956588396675544e-05, "loss": 0.0007, "step": 19630 }, { "epoch": 14.26, "grad_norm": 0.12272168695926666, "learning_rate": 1.5936415718550795e-05, "loss": 0.0047, "step": 19640 }, { "epoch": 14.26, "grad_norm": 27.803667068481445, "learning_rate": 1.591624304042605e-05, "loss": 0.0137, "step": 19650 }, { "epoch": 14.27, "grad_norm": 3.5240798752056435e-05, "learning_rate": 1.58960703623013e-05, "loss": 0.0028, "step": 19660 }, { "epoch": 14.28, "grad_norm": 0.00028095979359932244, "learning_rate": 1.5875897684176552e-05, "loss": 0.0043, "step": 19670 }, { "epoch": 14.29, "grad_norm": 0.5708059072494507, "learning_rate": 1.5855725006051806e-05, "loss": 0.0063, "step": 19680 }, { "epoch": 14.29, "grad_norm": 0.0016370153753086925, "learning_rate": 1.5835552327927057e-05, "loss": 0.0082, "step": 19690 }, { "epoch": 14.3, "grad_norm": 0.00016567490820307285, "learning_rate": 1.581537964980231e-05, "loss": 0.0108, "step": 19700 }, { "epoch": 14.31, "grad_norm": 0.00043105980148538947, "learning_rate": 1.5795206971677563e-05, "loss": 0.006, "step": 19710 }, { "epoch": 14.32, "grad_norm": 0.00017671240493655205, "learning_rate": 1.5775034293552814e-05, "loss": 0.0112, "step": 19720 }, { "epoch": 14.32, "grad_norm": 1.609751582145691, "learning_rate": 1.5754861615428065e-05, "loss": 0.0028, "step": 19730 }, { "epoch": 14.33, "grad_norm": 0.00035769614623859525, "learning_rate": 1.573468893730332e-05, "loss": 0.0068, "step": 19740 }, { "epoch": 14.34, "grad_norm": 0.2628963589668274, "learning_rate": 1.571451625917857e-05, "loss": 0.0192, "step": 19750 }, { "epoch": 14.34, "grad_norm": 0.03742462769150734, "learning_rate": 1.569434358105382e-05, "loss": 0.0016, "step": 19760 }, { "epoch": 14.35, "grad_norm": 0.000913703057449311, "learning_rate": 1.5674170902929073e-05, "loss": 0.0028, "step": 19770 }, { "epoch": 14.36, "grad_norm": 0.05503058806061745, "learning_rate": 1.5653998224804327e-05, "loss": 0.0028, "step": 19780 }, { "epoch": 14.37, "grad_norm": 0.028899654746055603, "learning_rate": 1.5633825546679578e-05, "loss": 0.0037, "step": 19790 }, { "epoch": 14.37, "grad_norm": 0.000255098711932078, "learning_rate": 1.561365286855483e-05, "loss": 0.0017, "step": 19800 }, { "epoch": 14.38, "grad_norm": 0.008871900849044323, "learning_rate": 1.559348019043008e-05, "loss": 0.0054, "step": 19810 }, { "epoch": 14.39, "grad_norm": 0.030201343819499016, "learning_rate": 1.5573307512305335e-05, "loss": 0.0057, "step": 19820 }, { "epoch": 14.4, "grad_norm": 0.01691788248717785, "learning_rate": 1.5553134834180586e-05, "loss": 0.0031, "step": 19830 }, { "epoch": 14.4, "grad_norm": 0.0881408080458641, "learning_rate": 1.5532962156055837e-05, "loss": 0.0134, "step": 19840 }, { "epoch": 14.41, "grad_norm": 0.0006498016882687807, "learning_rate": 1.551278947793109e-05, "loss": 0.0178, "step": 19850 }, { "epoch": 14.42, "grad_norm": 0.000303228065604344, "learning_rate": 1.5492616799806343e-05, "loss": 0.0159, "step": 19860 }, { "epoch": 14.42, "grad_norm": 0.19211918115615845, "learning_rate": 1.5472444121681594e-05, "loss": 0.0064, "step": 19870 }, { "epoch": 14.43, "grad_norm": 0.0003167142567690462, "learning_rate": 1.5452271443556848e-05, "loss": 0.01, "step": 19880 }, { "epoch": 14.44, "grad_norm": 34.510948181152344, "learning_rate": 1.54320987654321e-05, "loss": 0.0119, "step": 19890 }, { "epoch": 14.45, "grad_norm": 0.06265545636415482, "learning_rate": 1.541192608730735e-05, "loss": 0.0084, "step": 19900 }, { "epoch": 14.45, "grad_norm": 0.0007544682594016194, "learning_rate": 1.5391753409182605e-05, "loss": 0.0006, "step": 19910 }, { "epoch": 14.46, "grad_norm": 1.0826385021209717, "learning_rate": 1.5371580731057856e-05, "loss": 0.0033, "step": 19920 }, { "epoch": 14.47, "grad_norm": 0.031010733917355537, "learning_rate": 1.5351408052933107e-05, "loss": 0.0355, "step": 19930 }, { "epoch": 14.48, "grad_norm": 0.293381005525589, "learning_rate": 1.533123537480836e-05, "loss": 0.0204, "step": 19940 }, { "epoch": 14.48, "grad_norm": 0.003690192475914955, "learning_rate": 1.5311062696683612e-05, "loss": 0.0171, "step": 19950 }, { "epoch": 14.49, "grad_norm": 0.0008720943587832153, "learning_rate": 1.5290890018558863e-05, "loss": 0.0128, "step": 19960 }, { "epoch": 14.5, "grad_norm": 0.05105540528893471, "learning_rate": 1.5270717340434118e-05, "loss": 0.0033, "step": 19970 }, { "epoch": 14.5, "grad_norm": 0.1516324281692505, "learning_rate": 1.5250544662309369e-05, "loss": 0.0063, "step": 19980 }, { "epoch": 14.51, "grad_norm": 0.009380163624882698, "learning_rate": 1.5230371984184622e-05, "loss": 0.0109, "step": 19990 }, { "epoch": 14.52, "grad_norm": 0.0012153387069702148, "learning_rate": 1.5210199306059871e-05, "loss": 0.0022, "step": 20000 }, { "epoch": 14.53, "grad_norm": 0.0027521008159965277, "learning_rate": 1.5190026627935126e-05, "loss": 0.0024, "step": 20010 }, { "epoch": 14.53, "grad_norm": 21.582521438598633, "learning_rate": 1.5169853949810378e-05, "loss": 0.0036, "step": 20020 }, { "epoch": 14.54, "grad_norm": 0.003993968944996595, "learning_rate": 1.5149681271685628e-05, "loss": 0.0126, "step": 20030 }, { "epoch": 14.55, "grad_norm": 0.00026681035524234176, "learning_rate": 1.5129508593560882e-05, "loss": 0.0022, "step": 20040 }, { "epoch": 14.56, "grad_norm": 0.03443112596869469, "learning_rate": 1.5109335915436135e-05, "loss": 0.01, "step": 20050 }, { "epoch": 14.56, "grad_norm": 0.00041420606430619955, "learning_rate": 1.5089163237311384e-05, "loss": 0.0173, "step": 20060 }, { "epoch": 14.57, "grad_norm": 6.245350360870361, "learning_rate": 1.5068990559186639e-05, "loss": 0.0125, "step": 20070 }, { "epoch": 14.58, "grad_norm": 0.0017795218154788017, "learning_rate": 1.5048817881061892e-05, "loss": 0.0115, "step": 20080 }, { "epoch": 14.58, "grad_norm": 0.008628590032458305, "learning_rate": 1.5028645202937141e-05, "loss": 0.0056, "step": 20090 }, { "epoch": 14.59, "grad_norm": 4.200293064117432, "learning_rate": 1.5008472524812395e-05, "loss": 0.0176, "step": 20100 }, { "epoch": 14.6, "grad_norm": 0.03641504794359207, "learning_rate": 1.4988299846687648e-05, "loss": 0.0084, "step": 20110 }, { "epoch": 14.61, "grad_norm": 0.08268699795007706, "learning_rate": 1.4968127168562898e-05, "loss": 0.0017, "step": 20120 }, { "epoch": 14.61, "grad_norm": 0.0018813032656908035, "learning_rate": 1.4947954490438152e-05, "loss": 0.0097, "step": 20130 }, { "epoch": 14.62, "grad_norm": 0.0015808496391400695, "learning_rate": 1.4927781812313405e-05, "loss": 0.0072, "step": 20140 }, { "epoch": 14.63, "grad_norm": 0.0061777797527611256, "learning_rate": 1.4907609134188654e-05, "loss": 0.0026, "step": 20150 }, { "epoch": 14.64, "grad_norm": 0.04996776953339577, "learning_rate": 1.4887436456063909e-05, "loss": 0.0097, "step": 20160 }, { "epoch": 14.64, "grad_norm": 0.00472896546125412, "learning_rate": 1.4867263777939161e-05, "loss": 0.004, "step": 20170 }, { "epoch": 14.65, "grad_norm": 0.014625828713178635, "learning_rate": 1.4847091099814411e-05, "loss": 0.0003, "step": 20180 }, { "epoch": 14.66, "grad_norm": 0.013716181740164757, "learning_rate": 1.4826918421689665e-05, "loss": 0.0046, "step": 20190 }, { "epoch": 14.66, "grad_norm": 0.0011542306747287512, "learning_rate": 1.4806745743564918e-05, "loss": 0.0108, "step": 20200 }, { "epoch": 14.67, "grad_norm": 0.5403972268104553, "learning_rate": 1.4786573065440167e-05, "loss": 0.0004, "step": 20210 }, { "epoch": 14.68, "grad_norm": 0.026584791019558907, "learning_rate": 1.476640038731542e-05, "loss": 0.0031, "step": 20220 }, { "epoch": 14.69, "grad_norm": 0.11381633579730988, "learning_rate": 1.4746227709190675e-05, "loss": 0.0108, "step": 20230 }, { "epoch": 14.69, "grad_norm": 0.28148502111434937, "learning_rate": 1.4726055031065924e-05, "loss": 0.0082, "step": 20240 }, { "epoch": 14.7, "grad_norm": 0.03273105248808861, "learning_rate": 1.4705882352941177e-05, "loss": 0.0015, "step": 20250 }, { "epoch": 14.71, "grad_norm": 0.10272327065467834, "learning_rate": 1.4685709674816428e-05, "loss": 0.0049, "step": 20260 }, { "epoch": 14.72, "grad_norm": 0.09216368943452835, "learning_rate": 1.466553699669168e-05, "loss": 0.0103, "step": 20270 }, { "epoch": 14.72, "grad_norm": 0.0008601095178164542, "learning_rate": 1.4645364318566933e-05, "loss": 0.0047, "step": 20280 }, { "epoch": 14.73, "grad_norm": 0.0037641292437911034, "learning_rate": 1.4625191640442185e-05, "loss": 0.0013, "step": 20290 }, { "epoch": 14.74, "grad_norm": 0.001481158658862114, "learning_rate": 1.4605018962317437e-05, "loss": 0.0034, "step": 20300 }, { "epoch": 14.74, "grad_norm": 0.045567817986011505, "learning_rate": 1.458484628419269e-05, "loss": 0.007, "step": 20310 }, { "epoch": 14.75, "grad_norm": 0.0007129237637855113, "learning_rate": 1.4564673606067941e-05, "loss": 0.008, "step": 20320 }, { "epoch": 14.76, "grad_norm": 0.000712543202098459, "learning_rate": 1.4544500927943194e-05, "loss": 0.0041, "step": 20330 }, { "epoch": 14.77, "grad_norm": 0.002881669905036688, "learning_rate": 1.4524328249818447e-05, "loss": 0.0083, "step": 20340 }, { "epoch": 14.77, "grad_norm": 0.12856851518154144, "learning_rate": 1.4504155571693698e-05, "loss": 0.007, "step": 20350 }, { "epoch": 14.78, "grad_norm": 0.00111017981544137, "learning_rate": 1.448398289356895e-05, "loss": 0.0042, "step": 20360 }, { "epoch": 14.79, "grad_norm": 0.0010551324812695384, "learning_rate": 1.4463810215444203e-05, "loss": 0.0106, "step": 20370 }, { "epoch": 14.79, "grad_norm": 0.04143055900931358, "learning_rate": 1.4443637537319454e-05, "loss": 0.0033, "step": 20380 }, { "epoch": 14.8, "grad_norm": 0.005768972914665937, "learning_rate": 1.4423464859194707e-05, "loss": 0.0073, "step": 20390 }, { "epoch": 14.81, "grad_norm": 6.56055417493917e-05, "learning_rate": 1.440329218106996e-05, "loss": 0.0173, "step": 20400 }, { "epoch": 14.82, "grad_norm": 0.0011871436145156622, "learning_rate": 1.4383119502945211e-05, "loss": 0.0011, "step": 20410 }, { "epoch": 14.82, "grad_norm": 0.0003176795144099742, "learning_rate": 1.4362946824820464e-05, "loss": 0.0099, "step": 20420 }, { "epoch": 14.83, "grad_norm": 0.1483236402273178, "learning_rate": 1.4342774146695717e-05, "loss": 0.0035, "step": 20430 }, { "epoch": 14.84, "grad_norm": 0.37400639057159424, "learning_rate": 1.4322601468570968e-05, "loss": 0.0093, "step": 20440 }, { "epoch": 14.85, "grad_norm": 5.234686250332743e-05, "learning_rate": 1.430242879044622e-05, "loss": 0.0127, "step": 20450 }, { "epoch": 14.85, "grad_norm": 0.16454139351844788, "learning_rate": 1.4282256112321473e-05, "loss": 0.0096, "step": 20460 }, { "epoch": 14.86, "grad_norm": 30.7055721282959, "learning_rate": 1.4262083434196724e-05, "loss": 0.0079, "step": 20470 }, { "epoch": 14.87, "grad_norm": 0.01432458683848381, "learning_rate": 1.4241910756071977e-05, "loss": 0.0021, "step": 20480 }, { "epoch": 14.87, "grad_norm": 0.0014726252993568778, "learning_rate": 1.422173807794723e-05, "loss": 0.0036, "step": 20490 }, { "epoch": 14.88, "grad_norm": 0.0001415656297467649, "learning_rate": 1.4201565399822481e-05, "loss": 0.0043, "step": 20500 }, { "epoch": 14.89, "grad_norm": 0.9634278416633606, "learning_rate": 1.4181392721697734e-05, "loss": 0.0035, "step": 20510 }, { "epoch": 14.9, "grad_norm": 0.0005977644468657672, "learning_rate": 1.4161220043572985e-05, "loss": 0.0017, "step": 20520 }, { "epoch": 14.9, "grad_norm": 0.0001564495323691517, "learning_rate": 1.4141047365448238e-05, "loss": 0.0043, "step": 20530 }, { "epoch": 14.91, "grad_norm": 9.115212014876306e-05, "learning_rate": 1.412087468732349e-05, "loss": 0.0107, "step": 20540 }, { "epoch": 14.92, "grad_norm": 0.025375254452228546, "learning_rate": 1.4100702009198741e-05, "loss": 0.0135, "step": 20550 }, { "epoch": 14.93, "grad_norm": 0.12782764434814453, "learning_rate": 1.4080529331073994e-05, "loss": 0.0034, "step": 20560 }, { "epoch": 14.93, "grad_norm": 0.06092876195907593, "learning_rate": 1.4060356652949247e-05, "loss": 0.0066, "step": 20570 }, { "epoch": 14.94, "grad_norm": 0.007951307110488415, "learning_rate": 1.4040183974824498e-05, "loss": 0.0057, "step": 20580 }, { "epoch": 14.95, "grad_norm": 0.004196907859295607, "learning_rate": 1.402001129669975e-05, "loss": 0.0055, "step": 20590 }, { "epoch": 14.95, "grad_norm": 24.28219223022461, "learning_rate": 1.3999838618575004e-05, "loss": 0.0047, "step": 20600 }, { "epoch": 14.96, "grad_norm": 0.10869511216878891, "learning_rate": 1.3979665940450255e-05, "loss": 0.0049, "step": 20610 }, { "epoch": 14.97, "grad_norm": 0.003674560459330678, "learning_rate": 1.3959493262325507e-05, "loss": 0.0203, "step": 20620 }, { "epoch": 14.98, "grad_norm": 0.009799223393201828, "learning_rate": 1.393932058420076e-05, "loss": 0.0017, "step": 20630 }, { "epoch": 14.98, "grad_norm": 0.0014988232869654894, "learning_rate": 1.3919147906076011e-05, "loss": 0.0011, "step": 20640 }, { "epoch": 14.99, "grad_norm": 0.01472469512373209, "learning_rate": 1.3898975227951264e-05, "loss": 0.0025, "step": 20650 }, { "epoch": 15.0, "grad_norm": 0.005289836321026087, "learning_rate": 1.3878802549826517e-05, "loss": 0.0032, "step": 20660 }, { "epoch": 15.0, "eval_accuracy": 0.9982756273539956, "eval_f1": 0.9960188580408591, "eval_loss": 0.0035676565021276474, "eval_precision": 0.9994743481917577, "eval_recall": 0.9925871789517645, "eval_roc_auc": 0.9999858661107502, "eval_runtime": 385.486, "eval_samples_per_second": 228.667, "eval_steps_per_second": 14.294, "step": 20662 }, { "epoch": 15.01, "grad_norm": 13.548345565795898, "learning_rate": 1.3858629871701768e-05, "loss": 0.0016, "step": 20670 }, { "epoch": 15.01, "grad_norm": 0.04863952100276947, "learning_rate": 1.383845719357702e-05, "loss": 0.0031, "step": 20680 }, { "epoch": 15.02, "grad_norm": 0.026892608031630516, "learning_rate": 1.3818284515452273e-05, "loss": 0.0022, "step": 20690 }, { "epoch": 15.03, "grad_norm": 3.914630651473999, "learning_rate": 1.3798111837327524e-05, "loss": 0.0048, "step": 20700 }, { "epoch": 15.03, "grad_norm": 0.031707461923360825, "learning_rate": 1.3777939159202777e-05, "loss": 0.004, "step": 20710 }, { "epoch": 15.04, "grad_norm": 0.00016734382370486856, "learning_rate": 1.375776648107803e-05, "loss": 0.0021, "step": 20720 }, { "epoch": 15.05, "grad_norm": 0.5772116780281067, "learning_rate": 1.373759380295328e-05, "loss": 0.0055, "step": 20730 }, { "epoch": 15.06, "grad_norm": 0.10167176276445389, "learning_rate": 1.3717421124828534e-05, "loss": 0.0055, "step": 20740 }, { "epoch": 15.06, "grad_norm": 0.6610731482505798, "learning_rate": 1.3697248446703783e-05, "loss": 0.0114, "step": 20750 }, { "epoch": 15.07, "grad_norm": 0.001937453867867589, "learning_rate": 1.3677075768579036e-05, "loss": 0.0016, "step": 20760 }, { "epoch": 15.08, "grad_norm": 0.00024962503812275827, "learning_rate": 1.365690309045429e-05, "loss": 0.0028, "step": 20770 }, { "epoch": 15.09, "grad_norm": 0.017173290252685547, "learning_rate": 1.363673041232954e-05, "loss": 0.0053, "step": 20780 }, { "epoch": 15.09, "grad_norm": 0.5537311434745789, "learning_rate": 1.3616557734204793e-05, "loss": 0.0061, "step": 20790 }, { "epoch": 15.1, "grad_norm": 0.0027972084935754538, "learning_rate": 1.3596385056080047e-05, "loss": 0.0059, "step": 20800 }, { "epoch": 15.11, "grad_norm": 0.13410428166389465, "learning_rate": 1.3576212377955296e-05, "loss": 0.0033, "step": 20810 }, { "epoch": 15.11, "grad_norm": 33.71815490722656, "learning_rate": 1.355603969983055e-05, "loss": 0.0124, "step": 20820 }, { "epoch": 15.12, "grad_norm": 0.003130529774352908, "learning_rate": 1.3535867021705804e-05, "loss": 0.005, "step": 20830 }, { "epoch": 15.13, "grad_norm": 0.0005914736539125443, "learning_rate": 1.3515694343581053e-05, "loss": 0.0037, "step": 20840 }, { "epoch": 15.14, "grad_norm": 0.43801841139793396, "learning_rate": 1.3495521665456306e-05, "loss": 0.0078, "step": 20850 }, { "epoch": 15.14, "grad_norm": 0.0011702912161126733, "learning_rate": 1.347534898733156e-05, "loss": 0.004, "step": 20860 }, { "epoch": 15.15, "grad_norm": 0.0029354379512369633, "learning_rate": 1.345517630920681e-05, "loss": 0.0, "step": 20870 }, { "epoch": 15.16, "grad_norm": 0.11435387283563614, "learning_rate": 1.3435003631082063e-05, "loss": 0.0022, "step": 20880 }, { "epoch": 15.17, "grad_norm": 43.920196533203125, "learning_rate": 1.3414830952957317e-05, "loss": 0.0172, "step": 20890 }, { "epoch": 15.17, "grad_norm": 32.0137825012207, "learning_rate": 1.3394658274832566e-05, "loss": 0.0257, "step": 20900 }, { "epoch": 15.18, "grad_norm": 0.0019825261551886797, "learning_rate": 1.3374485596707819e-05, "loss": 0.0005, "step": 20910 }, { "epoch": 15.19, "grad_norm": 0.001471333671361208, "learning_rate": 1.3354312918583074e-05, "loss": 0.0105, "step": 20920 }, { "epoch": 15.19, "grad_norm": 0.04608182981610298, "learning_rate": 1.3334140240458323e-05, "loss": 0.008, "step": 20930 }, { "epoch": 15.2, "grad_norm": 0.058197587728500366, "learning_rate": 1.3313967562333576e-05, "loss": 0.0014, "step": 20940 }, { "epoch": 15.21, "grad_norm": 0.00060573237715289, "learning_rate": 1.329379488420883e-05, "loss": 0.0039, "step": 20950 }, { "epoch": 15.22, "grad_norm": 0.0463654026389122, "learning_rate": 1.327362220608408e-05, "loss": 0.0016, "step": 20960 }, { "epoch": 15.22, "grad_norm": 0.0003432673111092299, "learning_rate": 1.3253449527959332e-05, "loss": 0.0022, "step": 20970 }, { "epoch": 15.23, "grad_norm": 0.0033386244904249907, "learning_rate": 1.3233276849834587e-05, "loss": 0.0048, "step": 20980 }, { "epoch": 15.24, "grad_norm": 0.0028187900315970182, "learning_rate": 1.3213104171709836e-05, "loss": 0.0044, "step": 20990 }, { "epoch": 15.25, "grad_norm": 0.06616388261318207, "learning_rate": 1.3192931493585089e-05, "loss": 0.0025, "step": 21000 }, { "epoch": 15.25, "grad_norm": 0.0010185488499701023, "learning_rate": 1.317275881546034e-05, "loss": 0.0012, "step": 21010 }, { "epoch": 15.26, "grad_norm": 0.0004689507477451116, "learning_rate": 1.3152586137335593e-05, "loss": 0.0009, "step": 21020 }, { "epoch": 15.27, "grad_norm": 0.11378660798072815, "learning_rate": 1.3132413459210846e-05, "loss": 0.0055, "step": 21030 }, { "epoch": 15.27, "grad_norm": 0.00018735295452643186, "learning_rate": 1.3112240781086097e-05, "loss": 0.0024, "step": 21040 }, { "epoch": 15.28, "grad_norm": 0.0002505971642676741, "learning_rate": 1.309206810296135e-05, "loss": 0.0048, "step": 21050 }, { "epoch": 15.29, "grad_norm": 0.0006918599247001112, "learning_rate": 1.3071895424836602e-05, "loss": 0.0057, "step": 21060 }, { "epoch": 15.3, "grad_norm": 0.0002549978089518845, "learning_rate": 1.3051722746711853e-05, "loss": 0.0058, "step": 21070 }, { "epoch": 15.3, "grad_norm": 14.719676971435547, "learning_rate": 1.3031550068587106e-05, "loss": 0.0045, "step": 21080 }, { "epoch": 15.31, "grad_norm": 4.1548017179593444e-05, "learning_rate": 1.3011377390462359e-05, "loss": 0.0027, "step": 21090 }, { "epoch": 15.32, "grad_norm": 0.0011699338210746646, "learning_rate": 1.299120471233761e-05, "loss": 0.0093, "step": 21100 }, { "epoch": 15.32, "grad_norm": 0.017013998702168465, "learning_rate": 1.2971032034212863e-05, "loss": 0.0046, "step": 21110 }, { "epoch": 15.33, "grad_norm": 0.00029067235300317407, "learning_rate": 1.2950859356088115e-05, "loss": 0.001, "step": 21120 }, { "epoch": 15.34, "grad_norm": 0.07131381332874298, "learning_rate": 1.2930686677963367e-05, "loss": 0.0158, "step": 21130 }, { "epoch": 15.35, "grad_norm": 0.07471425831317902, "learning_rate": 1.291051399983862e-05, "loss": 0.0028, "step": 21140 }, { "epoch": 15.35, "grad_norm": 26.187938690185547, "learning_rate": 1.2890341321713872e-05, "loss": 0.0072, "step": 21150 }, { "epoch": 15.36, "grad_norm": 0.0010599270462989807, "learning_rate": 1.2870168643589123e-05, "loss": 0.0004, "step": 21160 }, { "epoch": 15.37, "grad_norm": 0.00012274249456822872, "learning_rate": 1.2849995965464376e-05, "loss": 0.0012, "step": 21170 }, { "epoch": 15.38, "grad_norm": 0.020930590108036995, "learning_rate": 1.2829823287339629e-05, "loss": 0.0088, "step": 21180 }, { "epoch": 15.38, "grad_norm": 0.0004236644599586725, "learning_rate": 1.280965060921488e-05, "loss": 0.0085, "step": 21190 }, { "epoch": 15.39, "grad_norm": 0.1861221194267273, "learning_rate": 1.2789477931090133e-05, "loss": 0.0059, "step": 21200 }, { "epoch": 15.4, "grad_norm": 0.001298606744967401, "learning_rate": 1.2769305252965385e-05, "loss": 0.0005, "step": 21210 }, { "epoch": 15.4, "grad_norm": 0.002539963461458683, "learning_rate": 1.2749132574840636e-05, "loss": 0.005, "step": 21220 }, { "epoch": 15.41, "grad_norm": 0.0002390586887486279, "learning_rate": 1.272895989671589e-05, "loss": 0.0085, "step": 21230 }, { "epoch": 15.42, "grad_norm": 0.003349823411554098, "learning_rate": 1.2708787218591139e-05, "loss": 0.0033, "step": 21240 }, { "epoch": 15.43, "grad_norm": 9.157711029052734, "learning_rate": 1.2688614540466393e-05, "loss": 0.0108, "step": 21250 }, { "epoch": 15.43, "grad_norm": 15.022656440734863, "learning_rate": 1.2668441862341646e-05, "loss": 0.0069, "step": 21260 }, { "epoch": 15.44, "grad_norm": 0.014409718103706837, "learning_rate": 1.2648269184216895e-05, "loss": 0.002, "step": 21270 }, { "epoch": 15.45, "grad_norm": 0.09259545803070068, "learning_rate": 1.262809650609215e-05, "loss": 0.0064, "step": 21280 }, { "epoch": 15.46, "grad_norm": 0.013745992444455624, "learning_rate": 1.2607923827967402e-05, "loss": 0.0032, "step": 21290 }, { "epoch": 15.46, "grad_norm": 0.3294007480144501, "learning_rate": 1.2587751149842652e-05, "loss": 0.005, "step": 21300 }, { "epoch": 15.47, "grad_norm": 0.008936860598623753, "learning_rate": 1.2567578471717906e-05, "loss": 0.0129, "step": 21310 }, { "epoch": 15.48, "grad_norm": 0.33121001720428467, "learning_rate": 1.2547405793593159e-05, "loss": 0.0022, "step": 21320 }, { "epoch": 15.48, "grad_norm": 0.004892081022262573, "learning_rate": 1.2527233115468408e-05, "loss": 0.0023, "step": 21330 }, { "epoch": 15.49, "grad_norm": 0.0006819283007644117, "learning_rate": 1.2507060437343663e-05, "loss": 0.003, "step": 21340 }, { "epoch": 15.5, "grad_norm": 0.07798388600349426, "learning_rate": 1.2486887759218914e-05, "loss": 0.0006, "step": 21350 }, { "epoch": 15.51, "grad_norm": 0.0054915305227041245, "learning_rate": 1.2466715081094167e-05, "loss": 0.0068, "step": 21360 }, { "epoch": 15.51, "grad_norm": 3.669898509979248, "learning_rate": 1.244654240296942e-05, "loss": 0.0025, "step": 21370 }, { "epoch": 15.52, "grad_norm": 0.00045484190923161805, "learning_rate": 1.242636972484467e-05, "loss": 0.003, "step": 21380 }, { "epoch": 15.53, "grad_norm": 21.309757232666016, "learning_rate": 1.2406197046719923e-05, "loss": 0.008, "step": 21390 }, { "epoch": 15.54, "grad_norm": 0.001877294504083693, "learning_rate": 1.2386024368595176e-05, "loss": 0.0058, "step": 21400 }, { "epoch": 15.54, "grad_norm": 0.034363534301519394, "learning_rate": 1.2365851690470427e-05, "loss": 0.0159, "step": 21410 }, { "epoch": 15.55, "grad_norm": 0.6154148578643799, "learning_rate": 1.2345679012345678e-05, "loss": 0.0019, "step": 21420 }, { "epoch": 15.56, "grad_norm": 0.0009509364608675241, "learning_rate": 1.2325506334220933e-05, "loss": 0.0031, "step": 21430 }, { "epoch": 15.56, "grad_norm": 0.00043885348713956773, "learning_rate": 1.2305333656096184e-05, "loss": 0.0127, "step": 21440 }, { "epoch": 15.57, "grad_norm": 0.0027128588408231735, "learning_rate": 1.2285160977971435e-05, "loss": 0.0013, "step": 21450 }, { "epoch": 15.58, "grad_norm": 2.2161917686462402, "learning_rate": 1.226498829984669e-05, "loss": 0.0108, "step": 21460 }, { "epoch": 15.59, "grad_norm": 0.13706153631210327, "learning_rate": 1.224481562172194e-05, "loss": 0.0075, "step": 21470 }, { "epoch": 15.59, "grad_norm": 0.009210462681949139, "learning_rate": 1.2224642943597192e-05, "loss": 0.0047, "step": 21480 }, { "epoch": 15.6, "grad_norm": 0.3337669372558594, "learning_rate": 1.2204470265472446e-05, "loss": 0.0065, "step": 21490 }, { "epoch": 15.61, "grad_norm": 0.042242661118507385, "learning_rate": 1.2184297587347697e-05, "loss": 0.0056, "step": 21500 }, { "epoch": 15.62, "grad_norm": 1.992792010307312, "learning_rate": 1.2164124909222948e-05, "loss": 0.0096, "step": 21510 }, { "epoch": 15.62, "grad_norm": 0.028501229360699654, "learning_rate": 1.2143952231098203e-05, "loss": 0.0053, "step": 21520 }, { "epoch": 15.63, "grad_norm": 54.20029830932617, "learning_rate": 1.2123779552973454e-05, "loss": 0.0046, "step": 21530 }, { "epoch": 15.64, "grad_norm": 0.05367405712604523, "learning_rate": 1.2103606874848705e-05, "loss": 0.0044, "step": 21540 }, { "epoch": 15.64, "grad_norm": 0.0021568976808339357, "learning_rate": 1.2083434196723958e-05, "loss": 0.0035, "step": 21550 }, { "epoch": 15.65, "grad_norm": 0.04927918314933777, "learning_rate": 1.206326151859921e-05, "loss": 0.0012, "step": 21560 }, { "epoch": 15.66, "grad_norm": 0.7703726291656494, "learning_rate": 1.2043088840474461e-05, "loss": 0.0036, "step": 21570 }, { "epoch": 15.67, "grad_norm": 0.03503729775547981, "learning_rate": 1.2022916162349714e-05, "loss": 0.0018, "step": 21580 }, { "epoch": 15.67, "grad_norm": 0.01794448494911194, "learning_rate": 1.2002743484224967e-05, "loss": 0.0059, "step": 21590 }, { "epoch": 15.68, "grad_norm": 0.09404520690441132, "learning_rate": 1.1982570806100218e-05, "loss": 0.0041, "step": 21600 }, { "epoch": 15.69, "grad_norm": 0.14694754779338837, "learning_rate": 1.196239812797547e-05, "loss": 0.0031, "step": 21610 }, { "epoch": 15.7, "grad_norm": 0.003776370780542493, "learning_rate": 1.1942225449850724e-05, "loss": 0.007, "step": 21620 }, { "epoch": 15.7, "grad_norm": 0.0026016957126557827, "learning_rate": 1.1922052771725975e-05, "loss": 0.0078, "step": 21630 }, { "epoch": 15.71, "grad_norm": 0.011103923432528973, "learning_rate": 1.1901880093601227e-05, "loss": 0.0102, "step": 21640 }, { "epoch": 15.72, "grad_norm": 0.13180720806121826, "learning_rate": 1.188170741547648e-05, "loss": 0.0035, "step": 21650 }, { "epoch": 15.72, "grad_norm": 16.478391647338867, "learning_rate": 1.1861534737351731e-05, "loss": 0.0131, "step": 21660 }, { "epoch": 15.73, "grad_norm": 0.001228134031407535, "learning_rate": 1.1841362059226984e-05, "loss": 0.0045, "step": 21670 }, { "epoch": 15.74, "grad_norm": 0.0005743647925555706, "learning_rate": 1.1821189381102235e-05, "loss": 0.0022, "step": 21680 }, { "epoch": 15.75, "grad_norm": 0.07435394078493118, "learning_rate": 1.1801016702977488e-05, "loss": 0.004, "step": 21690 }, { "epoch": 15.75, "grad_norm": 0.0004962944076396525, "learning_rate": 1.178084402485274e-05, "loss": 0.006, "step": 21700 }, { "epoch": 15.76, "grad_norm": 0.008445637300610542, "learning_rate": 1.1760671346727992e-05, "loss": 0.0014, "step": 21710 }, { "epoch": 15.77, "grad_norm": 0.08228937536478043, "learning_rate": 1.1740498668603244e-05, "loss": 0.0041, "step": 21720 }, { "epoch": 15.77, "grad_norm": 0.001986768562346697, "learning_rate": 1.1720325990478497e-05, "loss": 0.0006, "step": 21730 }, { "epoch": 15.78, "grad_norm": 0.030242083594202995, "learning_rate": 1.1700153312353748e-05, "loss": 0.0026, "step": 21740 }, { "epoch": 15.79, "grad_norm": 0.000717415998224169, "learning_rate": 1.1679980634229001e-05, "loss": 0.0036, "step": 21750 }, { "epoch": 15.8, "grad_norm": 0.001572469249367714, "learning_rate": 1.1659807956104254e-05, "loss": 0.0053, "step": 21760 }, { "epoch": 15.8, "grad_norm": 0.025059890002012253, "learning_rate": 1.1639635277979505e-05, "loss": 0.0017, "step": 21770 }, { "epoch": 15.81, "grad_norm": 0.0006473218672908843, "learning_rate": 1.1619462599854758e-05, "loss": 0.0061, "step": 21780 }, { "epoch": 15.82, "grad_norm": 0.12124957889318466, "learning_rate": 1.159928992173001e-05, "loss": 0.0038, "step": 21790 }, { "epoch": 15.83, "grad_norm": 0.2481914907693863, "learning_rate": 1.1579117243605262e-05, "loss": 0.0028, "step": 21800 }, { "epoch": 15.83, "grad_norm": 0.0032148726750165224, "learning_rate": 1.1558944565480513e-05, "loss": 0.0047, "step": 21810 }, { "epoch": 15.84, "grad_norm": 0.00942063145339489, "learning_rate": 1.1538771887355765e-05, "loss": 0.001, "step": 21820 }, { "epoch": 15.85, "grad_norm": 0.04141150042414665, "learning_rate": 1.1518599209231018e-05, "loss": 0.0036, "step": 21830 }, { "epoch": 15.85, "grad_norm": 0.03869690001010895, "learning_rate": 1.149842653110627e-05, "loss": 0.0027, "step": 21840 }, { "epoch": 15.86, "grad_norm": 0.0011927615851163864, "learning_rate": 1.1478253852981522e-05, "loss": 0.0017, "step": 21850 }, { "epoch": 15.87, "grad_norm": 0.008258639834821224, "learning_rate": 1.1458081174856775e-05, "loss": 0.0005, "step": 21860 }, { "epoch": 15.88, "grad_norm": 0.006110721733421087, "learning_rate": 1.1437908496732026e-05, "loss": 0.0054, "step": 21870 }, { "epoch": 15.88, "grad_norm": 0.15199817717075348, "learning_rate": 1.1417735818607279e-05, "loss": 0.0043, "step": 21880 }, { "epoch": 15.89, "grad_norm": 0.0010181881953030825, "learning_rate": 1.1397563140482531e-05, "loss": 0.0037, "step": 21890 }, { "epoch": 15.9, "grad_norm": 0.0009028404019773006, "learning_rate": 1.1377390462357783e-05, "loss": 0.0042, "step": 21900 }, { "epoch": 15.91, "grad_norm": 14.625401496887207, "learning_rate": 1.1357217784233035e-05, "loss": 0.016, "step": 21910 }, { "epoch": 15.91, "grad_norm": 0.0001988597068702802, "learning_rate": 1.1337045106108288e-05, "loss": 0.0017, "step": 21920 }, { "epoch": 15.92, "grad_norm": 0.000211259801289998, "learning_rate": 1.1316872427983539e-05, "loss": 0.0051, "step": 21930 }, { "epoch": 15.93, "grad_norm": 3.789830952882767e-05, "learning_rate": 1.1296699749858792e-05, "loss": 0.001, "step": 21940 }, { "epoch": 15.93, "grad_norm": 3.52205170202069e-05, "learning_rate": 1.1276527071734045e-05, "loss": 0.0026, "step": 21950 }, { "epoch": 15.94, "grad_norm": 0.0002518637338653207, "learning_rate": 1.1256354393609296e-05, "loss": 0.004, "step": 21960 }, { "epoch": 15.95, "grad_norm": 0.0853363573551178, "learning_rate": 1.1236181715484549e-05, "loss": 0.0112, "step": 21970 }, { "epoch": 15.96, "grad_norm": 0.0005988592747598886, "learning_rate": 1.1216009037359801e-05, "loss": 0.0028, "step": 21980 }, { "epoch": 15.96, "grad_norm": 0.07639417797327042, "learning_rate": 1.1195836359235052e-05, "loss": 0.0199, "step": 21990 }, { "epoch": 15.97, "grad_norm": 0.0889679491519928, "learning_rate": 1.1175663681110305e-05, "loss": 0.0067, "step": 22000 }, { "epoch": 15.98, "grad_norm": 0.0020072232000529766, "learning_rate": 1.1155491002985558e-05, "loss": 0.0003, "step": 22010 }, { "epoch": 15.99, "grad_norm": 0.05244208872318268, "learning_rate": 1.1135318324860809e-05, "loss": 0.0046, "step": 22020 }, { "epoch": 15.99, "grad_norm": 0.0003801613347604871, "learning_rate": 1.1115145646736062e-05, "loss": 0.0049, "step": 22030 }, { "epoch": 16.0, "grad_norm": 0.0038860926870256662, "learning_rate": 1.1094972968611313e-05, "loss": 0.0154, "step": 22040 }, { "epoch": 16.0, "eval_accuracy": 0.9983436946952852, "eval_f1": 0.9961762086847206, "eval_loss": 0.003294909605756402, "eval_precision": 0.9995795227583307, "eval_recall": 0.9927959908122781, "eval_roc_auc": 0.9999867430709279, "eval_runtime": 387.006, "eval_samples_per_second": 227.769, "eval_steps_per_second": 14.238, "step": 22040 }, { "epoch": 16.01, "grad_norm": 0.0014847785932943225, "learning_rate": 1.1074800290486566e-05, "loss": 0.0051, "step": 22050 }, { "epoch": 16.01, "grad_norm": 2.5662457942962646, "learning_rate": 1.1054627612361818e-05, "loss": 0.0037, "step": 22060 }, { "epoch": 16.02, "grad_norm": 0.0007208075257949531, "learning_rate": 1.103445493423707e-05, "loss": 0.004, "step": 22070 }, { "epoch": 16.03, "grad_norm": 0.0007924524252302945, "learning_rate": 1.1014282256112322e-05, "loss": 0.0034, "step": 22080 }, { "epoch": 16.04, "grad_norm": 0.15768921375274658, "learning_rate": 1.0994109577987573e-05, "loss": 0.0037, "step": 22090 }, { "epoch": 16.04, "grad_norm": 0.0002586398331914097, "learning_rate": 1.0973936899862826e-05, "loss": 0.0021, "step": 22100 }, { "epoch": 16.05, "grad_norm": 0.005661542993038893, "learning_rate": 1.0953764221738079e-05, "loss": 0.0031, "step": 22110 }, { "epoch": 16.06, "grad_norm": 0.0002936258970294148, "learning_rate": 1.093359154361333e-05, "loss": 0.0038, "step": 22120 }, { "epoch": 16.07, "grad_norm": 0.001346628530882299, "learning_rate": 1.0913418865488583e-05, "loss": 0.0014, "step": 22130 }, { "epoch": 16.07, "grad_norm": 0.08555719256401062, "learning_rate": 1.0893246187363835e-05, "loss": 0.0173, "step": 22140 }, { "epoch": 16.08, "grad_norm": 0.00019549914577510208, "learning_rate": 1.0873073509239087e-05, "loss": 0.0, "step": 22150 }, { "epoch": 16.09, "grad_norm": 0.0027101049199700356, "learning_rate": 1.085290083111434e-05, "loss": 0.0052, "step": 22160 }, { "epoch": 16.09, "grad_norm": 0.00028255765209905803, "learning_rate": 1.083272815298959e-05, "loss": 0.0046, "step": 22170 }, { "epoch": 16.1, "grad_norm": 0.0008104601292870939, "learning_rate": 1.0812555474864843e-05, "loss": 0.001, "step": 22180 }, { "epoch": 16.11, "grad_norm": 0.09307300299406052, "learning_rate": 1.0792382796740096e-05, "loss": 0.0097, "step": 22190 }, { "epoch": 16.12, "grad_norm": 0.00042667603702284396, "learning_rate": 1.0772210118615347e-05, "loss": 0.0029, "step": 22200 }, { "epoch": 16.12, "grad_norm": 0.03504578396677971, "learning_rate": 1.07520374404906e-05, "loss": 0.0032, "step": 22210 }, { "epoch": 16.13, "grad_norm": 0.007888894528150558, "learning_rate": 1.0731864762365853e-05, "loss": 0.0021, "step": 22220 }, { "epoch": 16.14, "grad_norm": 0.09013179689645767, "learning_rate": 1.0711692084241104e-05, "loss": 0.0031, "step": 22230 }, { "epoch": 16.15, "grad_norm": 0.00033059256384149194, "learning_rate": 1.0691519406116356e-05, "loss": 0.0042, "step": 22240 }, { "epoch": 16.15, "grad_norm": 0.0006427318439818919, "learning_rate": 1.067134672799161e-05, "loss": 0.0105, "step": 22250 }, { "epoch": 16.16, "grad_norm": 0.0007930251304060221, "learning_rate": 1.065117404986686e-05, "loss": 0.0002, "step": 22260 }, { "epoch": 16.17, "grad_norm": 0.08082820475101471, "learning_rate": 1.0631001371742113e-05, "loss": 0.0042, "step": 22270 }, { "epoch": 16.17, "grad_norm": 0.10491488128900528, "learning_rate": 1.0610828693617366e-05, "loss": 0.0029, "step": 22280 }, { "epoch": 16.18, "grad_norm": 0.0001469424896640703, "learning_rate": 1.0590656015492617e-05, "loss": 0.0028, "step": 22290 }, { "epoch": 16.19, "grad_norm": 0.041843656450510025, "learning_rate": 1.057048333736787e-05, "loss": 0.0042, "step": 22300 }, { "epoch": 16.2, "grad_norm": 0.00035109708551317453, "learning_rate": 1.0550310659243122e-05, "loss": 0.0024, "step": 22310 }, { "epoch": 16.2, "grad_norm": 0.0014608385972678661, "learning_rate": 1.0530137981118374e-05, "loss": 0.0017, "step": 22320 }, { "epoch": 16.21, "grad_norm": 0.00027612573467195034, "learning_rate": 1.0509965302993625e-05, "loss": 0.0008, "step": 22330 }, { "epoch": 16.22, "grad_norm": 2.363631010055542, "learning_rate": 1.0489792624868879e-05, "loss": 0.0051, "step": 22340 }, { "epoch": 16.23, "grad_norm": 0.0002950582420453429, "learning_rate": 1.046961994674413e-05, "loss": 0.0023, "step": 22350 }, { "epoch": 16.23, "grad_norm": 0.0681629404425621, "learning_rate": 1.0449447268619381e-05, "loss": 0.0004, "step": 22360 }, { "epoch": 16.24, "grad_norm": 0.00012084317131666467, "learning_rate": 1.0429274590494636e-05, "loss": 0.0, "step": 22370 }, { "epoch": 16.25, "grad_norm": 0.00011871389142470434, "learning_rate": 1.0409101912369887e-05, "loss": 0.0026, "step": 22380 }, { "epoch": 16.25, "grad_norm": 0.0011634239926934242, "learning_rate": 1.0388929234245138e-05, "loss": 0.0009, "step": 22390 }, { "epoch": 16.26, "grad_norm": 0.20264987647533417, "learning_rate": 1.0368756556120392e-05, "loss": 0.0036, "step": 22400 }, { "epoch": 16.27, "grad_norm": 0.0032964616548269987, "learning_rate": 1.0348583877995643e-05, "loss": 0.0023, "step": 22410 }, { "epoch": 16.28, "grad_norm": 0.07189714908599854, "learning_rate": 1.0328411199870894e-05, "loss": 0.0023, "step": 22420 }, { "epoch": 16.28, "grad_norm": 2.3079440593719482, "learning_rate": 1.0308238521746147e-05, "loss": 0.0012, "step": 22430 }, { "epoch": 16.29, "grad_norm": 0.007369950879365206, "learning_rate": 1.02880658436214e-05, "loss": 0.0009, "step": 22440 }, { "epoch": 16.3, "grad_norm": 0.00015242438530549407, "learning_rate": 1.0267893165496651e-05, "loss": 0.0009, "step": 22450 }, { "epoch": 16.3, "grad_norm": 0.08135165274143219, "learning_rate": 1.0247720487371904e-05, "loss": 0.0084, "step": 22460 }, { "epoch": 16.31, "grad_norm": 0.04192609712481499, "learning_rate": 1.0227547809247157e-05, "loss": 0.0042, "step": 22470 }, { "epoch": 16.32, "grad_norm": 0.0002498167159501463, "learning_rate": 1.0207375131122408e-05, "loss": 0.0035, "step": 22480 }, { "epoch": 16.33, "grad_norm": 0.005605372134596109, "learning_rate": 1.018720245299766e-05, "loss": 0.0018, "step": 22490 }, { "epoch": 16.33, "grad_norm": 0.04002232104539871, "learning_rate": 1.0167029774872913e-05, "loss": 0.0126, "step": 22500 }, { "epoch": 16.34, "grad_norm": 0.0337141789495945, "learning_rate": 1.0146857096748164e-05, "loss": 0.0093, "step": 22510 }, { "epoch": 16.35, "grad_norm": 0.005516758654266596, "learning_rate": 1.0126684418623417e-05, "loss": 0.004, "step": 22520 }, { "epoch": 16.36, "grad_norm": 0.03148741275072098, "learning_rate": 1.010651174049867e-05, "loss": 0.0064, "step": 22530 }, { "epoch": 16.36, "grad_norm": 0.02057914063334465, "learning_rate": 1.0086339062373921e-05, "loss": 0.0183, "step": 22540 }, { "epoch": 16.37, "grad_norm": 0.13048245012760162, "learning_rate": 1.0066166384249174e-05, "loss": 0.0046, "step": 22550 }, { "epoch": 16.38, "grad_norm": 0.0004054498567711562, "learning_rate": 1.0045993706124425e-05, "loss": 0.0001, "step": 22560 }, { "epoch": 16.38, "grad_norm": 0.0032589773181825876, "learning_rate": 1.0025821027999678e-05, "loss": 0.0005, "step": 22570 }, { "epoch": 16.39, "grad_norm": 0.0005514941876754165, "learning_rate": 1.000564834987493e-05, "loss": 0.0006, "step": 22580 }, { "epoch": 16.4, "grad_norm": 0.012021565809845924, "learning_rate": 9.985475671750181e-06, "loss": 0.0092, "step": 22590 }, { "epoch": 16.41, "grad_norm": 0.0435185469686985, "learning_rate": 9.965302993625434e-06, "loss": 0.0035, "step": 22600 }, { "epoch": 16.41, "grad_norm": 0.0010871135164052248, "learning_rate": 9.945130315500687e-06, "loss": 0.0096, "step": 22610 }, { "epoch": 16.42, "grad_norm": 0.010872997343540192, "learning_rate": 9.924957637375938e-06, "loss": 0.0052, "step": 22620 }, { "epoch": 16.43, "grad_norm": 0.0006780479452572763, "learning_rate": 9.90478495925119e-06, "loss": 0.001, "step": 22630 }, { "epoch": 16.44, "grad_norm": 0.03470781072974205, "learning_rate": 9.884612281126444e-06, "loss": 0.0095, "step": 22640 }, { "epoch": 16.44, "grad_norm": 0.010145553387701511, "learning_rate": 9.864439603001695e-06, "loss": 0.01, "step": 22650 }, { "epoch": 16.45, "grad_norm": 0.0024723324459046125, "learning_rate": 9.844266924876946e-06, "loss": 0.0012, "step": 22660 }, { "epoch": 16.46, "grad_norm": 0.19039224088191986, "learning_rate": 9.8240942467522e-06, "loss": 0.0057, "step": 22670 }, { "epoch": 16.46, "grad_norm": 0.03419295325875282, "learning_rate": 9.803921568627451e-06, "loss": 0.0, "step": 22680 }, { "epoch": 16.47, "grad_norm": 0.000138914241688326, "learning_rate": 9.783748890502702e-06, "loss": 0.0087, "step": 22690 }, { "epoch": 16.48, "grad_norm": 0.057385221123695374, "learning_rate": 9.763576212377957e-06, "loss": 0.0065, "step": 22700 }, { "epoch": 16.49, "grad_norm": 0.08868400007486343, "learning_rate": 9.743403534253208e-06, "loss": 0.0049, "step": 22710 }, { "epoch": 16.49, "grad_norm": 0.0007755668484605849, "learning_rate": 9.723230856128459e-06, "loss": 0.0025, "step": 22720 }, { "epoch": 16.5, "grad_norm": 0.004135680850595236, "learning_rate": 9.703058178003713e-06, "loss": 0.0026, "step": 22730 }, { "epoch": 16.51, "grad_norm": 0.0001737570419209078, "learning_rate": 9.682885499878964e-06, "loss": 0.0047, "step": 22740 }, { "epoch": 16.52, "grad_norm": 0.207319438457489, "learning_rate": 9.662712821754216e-06, "loss": 0.0057, "step": 22750 }, { "epoch": 16.52, "grad_norm": 0.5539862513542175, "learning_rate": 9.64254014362947e-06, "loss": 0.0059, "step": 22760 }, { "epoch": 16.53, "grad_norm": 0.205776184797287, "learning_rate": 9.622367465504721e-06, "loss": 0.0066, "step": 22770 }, { "epoch": 16.54, "grad_norm": 0.16564740240573883, "learning_rate": 9.602194787379972e-06, "loss": 0.0014, "step": 22780 }, { "epoch": 16.54, "grad_norm": 0.0013297771802172065, "learning_rate": 9.582022109255225e-06, "loss": 0.003, "step": 22790 }, { "epoch": 16.55, "grad_norm": 2.7137090000906028e-05, "learning_rate": 9.561849431130478e-06, "loss": 0.0025, "step": 22800 }, { "epoch": 16.56, "grad_norm": 0.00012067361240042374, "learning_rate": 9.541676753005729e-06, "loss": 0.0004, "step": 22810 }, { "epoch": 16.57, "grad_norm": 0.0005142318550497293, "learning_rate": 9.521504074880982e-06, "loss": 0.0014, "step": 22820 }, { "epoch": 16.57, "grad_norm": 8.987193723442033e-05, "learning_rate": 9.501331396756234e-06, "loss": 0.0074, "step": 22830 }, { "epoch": 16.58, "grad_norm": 0.04224991053342819, "learning_rate": 9.481158718631485e-06, "loss": 0.0041, "step": 22840 }, { "epoch": 16.59, "grad_norm": 0.0012686103582382202, "learning_rate": 9.460986040506738e-06, "loss": 0.0082, "step": 22850 }, { "epoch": 16.6, "grad_norm": 1.1625761985778809, "learning_rate": 9.440813362381991e-06, "loss": 0.0056, "step": 22860 }, { "epoch": 16.6, "grad_norm": 13.984100341796875, "learning_rate": 9.420640684257242e-06, "loss": 0.001, "step": 22870 }, { "epoch": 16.61, "grad_norm": 0.0042472160421311855, "learning_rate": 9.400468006132495e-06, "loss": 0.0008, "step": 22880 }, { "epoch": 16.62, "grad_norm": 0.013330524787306786, "learning_rate": 9.380295328007748e-06, "loss": 0.0023, "step": 22890 }, { "epoch": 16.62, "grad_norm": 0.002952015260234475, "learning_rate": 9.360122649882999e-06, "loss": 0.0026, "step": 22900 }, { "epoch": 16.63, "grad_norm": 6.471742381108925e-05, "learning_rate": 9.339949971758251e-06, "loss": 0.0049, "step": 22910 }, { "epoch": 16.64, "grad_norm": 0.000877292244695127, "learning_rate": 9.319777293633503e-06, "loss": 0.0028, "step": 22920 }, { "epoch": 16.65, "grad_norm": 0.001535005052573979, "learning_rate": 9.299604615508755e-06, "loss": 0.0016, "step": 22930 }, { "epoch": 16.65, "grad_norm": 54.134674072265625, "learning_rate": 9.279431937384008e-06, "loss": 0.0048, "step": 22940 }, { "epoch": 16.66, "grad_norm": 0.09877274930477142, "learning_rate": 9.259259259259259e-06, "loss": 0.0037, "step": 22950 }, { "epoch": 16.67, "grad_norm": 0.03050428256392479, "learning_rate": 9.239086581134512e-06, "loss": 0.0016, "step": 22960 }, { "epoch": 16.68, "grad_norm": 9.728727309266105e-05, "learning_rate": 9.218913903009765e-06, "loss": 0.0028, "step": 22970 }, { "epoch": 16.68, "grad_norm": 0.10689128190279007, "learning_rate": 9.198741224885016e-06, "loss": 0.0013, "step": 22980 }, { "epoch": 16.69, "grad_norm": 0.0006969768437556922, "learning_rate": 9.178568546760269e-06, "loss": 0.0045, "step": 22990 }, { "epoch": 16.7, "grad_norm": 0.0004667758184950799, "learning_rate": 9.158395868635521e-06, "loss": 0.002, "step": 23000 }, { "epoch": 16.7, "grad_norm": 0.3971853256225586, "learning_rate": 9.138223190510772e-06, "loss": 0.003, "step": 23010 }, { "epoch": 16.71, "grad_norm": 6.46812331979163e-05, "learning_rate": 9.118050512386025e-06, "loss": 0.0051, "step": 23020 }, { "epoch": 16.72, "grad_norm": 0.02988688088953495, "learning_rate": 9.097877834261278e-06, "loss": 0.0013, "step": 23030 }, { "epoch": 16.73, "grad_norm": 0.0004272510705050081, "learning_rate": 9.077705156136529e-06, "loss": 0.0292, "step": 23040 }, { "epoch": 16.73, "grad_norm": 0.00036017660750076175, "learning_rate": 9.05753247801178e-06, "loss": 0.0035, "step": 23050 }, { "epoch": 16.74, "grad_norm": 0.00015025348693598062, "learning_rate": 9.037359799887035e-06, "loss": 0.0121, "step": 23060 }, { "epoch": 16.75, "grad_norm": 0.054434388875961304, "learning_rate": 9.017187121762286e-06, "loss": 0.002, "step": 23070 }, { "epoch": 16.75, "grad_norm": 0.000985904480330646, "learning_rate": 8.997014443637537e-06, "loss": 0.0038, "step": 23080 }, { "epoch": 16.76, "grad_norm": 0.12436607480049133, "learning_rate": 8.976841765512791e-06, "loss": 0.0044, "step": 23090 }, { "epoch": 16.77, "grad_norm": 0.03900473937392235, "learning_rate": 8.956669087388042e-06, "loss": 0.0127, "step": 23100 }, { "epoch": 16.78, "grad_norm": 0.0008112489013001323, "learning_rate": 8.936496409263293e-06, "loss": 0.0, "step": 23110 }, { "epoch": 16.78, "grad_norm": 0.034635029733181, "learning_rate": 8.916323731138548e-06, "loss": 0.0046, "step": 23120 }, { "epoch": 16.79, "grad_norm": 0.0005446127033792436, "learning_rate": 8.896151053013799e-06, "loss": 0.0039, "step": 23130 }, { "epoch": 16.8, "grad_norm": 0.02086440846323967, "learning_rate": 8.87597837488905e-06, "loss": 0.0056, "step": 23140 }, { "epoch": 16.81, "grad_norm": 0.2668115198612213, "learning_rate": 8.855805696764303e-06, "loss": 0.0094, "step": 23150 }, { "epoch": 16.81, "grad_norm": 0.0030102794989943504, "learning_rate": 8.835633018639555e-06, "loss": 0.003, "step": 23160 }, { "epoch": 16.82, "grad_norm": 0.08962462097406387, "learning_rate": 8.815460340514807e-06, "loss": 0.0043, "step": 23170 }, { "epoch": 16.83, "grad_norm": 0.0004162557306699455, "learning_rate": 8.79528766239006e-06, "loss": 0.0011, "step": 23180 }, { "epoch": 16.83, "grad_norm": 0.08701346069574356, "learning_rate": 8.775114984265312e-06, "loss": 0.0063, "step": 23190 }, { "epoch": 16.84, "grad_norm": 0.0005376107874326408, "learning_rate": 8.754942306140563e-06, "loss": 0.0008, "step": 23200 }, { "epoch": 16.85, "grad_norm": 0.002278102096170187, "learning_rate": 8.734769628015816e-06, "loss": 0.0049, "step": 23210 }, { "epoch": 16.86, "grad_norm": 0.0011193858226761222, "learning_rate": 8.714596949891069e-06, "loss": 0.0105, "step": 23220 }, { "epoch": 16.86, "grad_norm": 0.03300873935222626, "learning_rate": 8.69442427176632e-06, "loss": 0.0038, "step": 23230 }, { "epoch": 16.87, "grad_norm": 0.08699040114879608, "learning_rate": 8.674251593641573e-06, "loss": 0.0095, "step": 23240 }, { "epoch": 16.88, "grad_norm": 0.13259164988994598, "learning_rate": 8.654078915516825e-06, "loss": 0.0089, "step": 23250 }, { "epoch": 16.89, "grad_norm": 0.011855214834213257, "learning_rate": 8.633906237392076e-06, "loss": 0.0059, "step": 23260 }, { "epoch": 16.89, "grad_norm": 0.001402218360453844, "learning_rate": 8.61373355926733e-06, "loss": 0.0048, "step": 23270 }, { "epoch": 16.9, "grad_norm": 0.0021518440917134285, "learning_rate": 8.59356088114258e-06, "loss": 0.0064, "step": 23280 }, { "epoch": 16.91, "grad_norm": 0.03953978046774864, "learning_rate": 8.573388203017833e-06, "loss": 0.0266, "step": 23290 }, { "epoch": 16.91, "grad_norm": 0.002244510455057025, "learning_rate": 8.553215524893086e-06, "loss": 0.0006, "step": 23300 }, { "epoch": 16.92, "grad_norm": 0.03700876981019974, "learning_rate": 8.533042846768337e-06, "loss": 0.0026, "step": 23310 }, { "epoch": 16.93, "grad_norm": 0.005215387791395187, "learning_rate": 8.51287016864359e-06, "loss": 0.0039, "step": 23320 }, { "epoch": 16.94, "grad_norm": 0.0015378224197775126, "learning_rate": 8.492697490518842e-06, "loss": 0.0042, "step": 23330 }, { "epoch": 16.94, "grad_norm": 0.05736195296049118, "learning_rate": 8.472524812394094e-06, "loss": 0.0039, "step": 23340 }, { "epoch": 16.95, "grad_norm": 0.03182898834347725, "learning_rate": 8.452352134269346e-06, "loss": 0.0086, "step": 23350 }, { "epoch": 16.96, "grad_norm": 7.0105366706848145, "learning_rate": 8.432179456144599e-06, "loss": 0.006, "step": 23360 }, { "epoch": 16.97, "grad_norm": 0.006832567043602467, "learning_rate": 8.41200677801985e-06, "loss": 0.0043, "step": 23370 }, { "epoch": 16.97, "grad_norm": 0.06288957595825195, "learning_rate": 8.391834099895103e-06, "loss": 0.0054, "step": 23380 }, { "epoch": 16.98, "grad_norm": 0.0075091165490448475, "learning_rate": 8.371661421770356e-06, "loss": 0.0097, "step": 23390 }, { "epoch": 16.99, "grad_norm": 0.0006531989201903343, "learning_rate": 8.351488743645607e-06, "loss": 0.0009, "step": 23400 }, { "epoch": 16.99, "grad_norm": 0.0026475286576896906, "learning_rate": 8.331316065520858e-06, "loss": 0.0041, "step": 23410 }, { "epoch": 17.0, "eval_accuracy": 0.9983663838090484, "eval_f1": 0.9962274037201991, "eval_loss": 0.0031724609434604645, "eval_precision": 0.9999474071736615, "eval_recall": 0.9925349759866361, "eval_roc_auc": 0.9999873945486788, "eval_runtime": 388.297, "eval_samples_per_second": 227.012, "eval_steps_per_second": 14.19, "step": 23417 }, { "epoch": 17.0, "grad_norm": 0.11131101846694946, "learning_rate": 8.31114338739611e-06, "loss": 0.0114, "step": 23420 }, { "epoch": 17.01, "grad_norm": 0.09979415684938431, "learning_rate": 8.290970709271363e-06, "loss": 0.0058, "step": 23430 }, { "epoch": 17.02, "grad_norm": 0.05326874554157257, "learning_rate": 8.270798031146614e-06, "loss": 0.0027, "step": 23440 }, { "epoch": 17.02, "grad_norm": 0.004584606271237135, "learning_rate": 8.250625353021867e-06, "loss": 0.0026, "step": 23450 }, { "epoch": 17.03, "grad_norm": 0.08099240064620972, "learning_rate": 8.23045267489712e-06, "loss": 0.0062, "step": 23460 }, { "epoch": 17.04, "grad_norm": 0.01665760576725006, "learning_rate": 8.210279996772371e-06, "loss": 0.0017, "step": 23470 }, { "epoch": 17.05, "grad_norm": 0.0030292777810245752, "learning_rate": 8.190107318647624e-06, "loss": 0.0, "step": 23480 }, { "epoch": 17.05, "grad_norm": 0.004888925235718489, "learning_rate": 8.169934640522877e-06, "loss": 0.0049, "step": 23490 }, { "epoch": 17.06, "grad_norm": 0.01544449757784605, "learning_rate": 8.149761962398128e-06, "loss": 0.0029, "step": 23500 }, { "epoch": 17.07, "grad_norm": 0.3572964370250702, "learning_rate": 8.12958928427338e-06, "loss": 0.013, "step": 23510 }, { "epoch": 17.07, "grad_norm": 0.0019218194065615535, "learning_rate": 8.109416606148633e-06, "loss": 0.0005, "step": 23520 }, { "epoch": 17.08, "grad_norm": 33.84624099731445, "learning_rate": 8.089243928023884e-06, "loss": 0.0251, "step": 23530 }, { "epoch": 17.09, "grad_norm": 0.0994623526930809, "learning_rate": 8.069071249899137e-06, "loss": 0.0042, "step": 23540 }, { "epoch": 17.1, "grad_norm": 0.03560971841216087, "learning_rate": 8.04889857177439e-06, "loss": 0.0111, "step": 23550 }, { "epoch": 17.1, "grad_norm": 0.0007038107723928988, "learning_rate": 8.028725893649641e-06, "loss": 0.0067, "step": 23560 }, { "epoch": 17.11, "grad_norm": 0.04057719558477402, "learning_rate": 8.008553215524894e-06, "loss": 0.0024, "step": 23570 }, { "epoch": 17.12, "grad_norm": 0.057476069778203964, "learning_rate": 7.988380537400146e-06, "loss": 0.0032, "step": 23580 }, { "epoch": 17.13, "grad_norm": 0.0002899345417972654, "learning_rate": 7.968207859275398e-06, "loss": 0.0029, "step": 23590 }, { "epoch": 17.13, "grad_norm": 0.0003263648832216859, "learning_rate": 7.94803518115065e-06, "loss": 0.0031, "step": 23600 }, { "epoch": 17.14, "grad_norm": 0.0024582231417298317, "learning_rate": 7.927862503025903e-06, "loss": 0.0061, "step": 23610 }, { "epoch": 17.15, "grad_norm": 0.0026196760591119528, "learning_rate": 7.907689824901154e-06, "loss": 0.0032, "step": 23620 }, { "epoch": 17.15, "grad_norm": 0.0027710944414138794, "learning_rate": 7.887517146776407e-06, "loss": 0.002, "step": 23630 }, { "epoch": 17.16, "grad_norm": 0.0001118101499741897, "learning_rate": 7.86734446865166e-06, "loss": 0.0068, "step": 23640 }, { "epoch": 17.17, "grad_norm": 0.9812076091766357, "learning_rate": 7.84717179052691e-06, "loss": 0.0044, "step": 23650 }, { "epoch": 17.18, "grad_norm": 0.0007418180466629565, "learning_rate": 7.826999112402164e-06, "loss": 0.0042, "step": 23660 }, { "epoch": 17.18, "grad_norm": 0.04977266862988472, "learning_rate": 7.806826434277415e-06, "loss": 0.0023, "step": 23670 }, { "epoch": 17.19, "grad_norm": 0.00283865537494421, "learning_rate": 7.786653756152667e-06, "loss": 0.0011, "step": 23680 }, { "epoch": 17.2, "grad_norm": 0.16779397428035736, "learning_rate": 7.766481078027918e-06, "loss": 0.0035, "step": 23690 }, { "epoch": 17.21, "grad_norm": 0.001473784213885665, "learning_rate": 7.746308399903171e-06, "loss": 0.0036, "step": 23700 }, { "epoch": 17.21, "grad_norm": 0.00017607476911507547, "learning_rate": 7.726135721778424e-06, "loss": 0.002, "step": 23710 }, { "epoch": 17.22, "grad_norm": 0.027364423498511314, "learning_rate": 7.705963043653675e-06, "loss": 0.0092, "step": 23720 }, { "epoch": 17.23, "grad_norm": 0.00015377481759060174, "learning_rate": 7.685790365528928e-06, "loss": 0.0023, "step": 23730 }, { "epoch": 17.23, "grad_norm": 10.986251831054688, "learning_rate": 7.66561768740418e-06, "loss": 0.0128, "step": 23740 }, { "epoch": 17.24, "grad_norm": 0.13001392781734467, "learning_rate": 7.645445009279432e-06, "loss": 0.0042, "step": 23750 }, { "epoch": 17.25, "grad_norm": 0.033883240073919296, "learning_rate": 7.6252723311546845e-06, "loss": 0.002, "step": 23760 }, { "epoch": 17.26, "grad_norm": 0.06336942315101624, "learning_rate": 7.6050996530299356e-06, "loss": 0.0008, "step": 23770 }, { "epoch": 17.26, "grad_norm": 0.0002739182091318071, "learning_rate": 7.584926974905189e-06, "loss": 0.0025, "step": 23780 }, { "epoch": 17.27, "grad_norm": 0.003048642072826624, "learning_rate": 7.564754296780441e-06, "loss": 0.0021, "step": 23790 }, { "epoch": 17.28, "grad_norm": 0.002867691917344928, "learning_rate": 7.544581618655692e-06, "loss": 0.0004, "step": 23800 }, { "epoch": 17.28, "grad_norm": 0.1003432348370552, "learning_rate": 7.524408940530946e-06, "loss": 0.0049, "step": 23810 }, { "epoch": 17.29, "grad_norm": 0.0033953345846384764, "learning_rate": 7.504236262406198e-06, "loss": 0.0004, "step": 23820 }, { "epoch": 17.3, "grad_norm": 0.012755511328577995, "learning_rate": 7.484063584281449e-06, "loss": 0.0054, "step": 23830 }, { "epoch": 17.31, "grad_norm": 0.000518796790856868, "learning_rate": 7.4638909061567024e-06, "loss": 0.0033, "step": 23840 }, { "epoch": 17.31, "grad_norm": 0.11989326775074005, "learning_rate": 7.443718228031954e-06, "loss": 0.003, "step": 23850 }, { "epoch": 17.32, "grad_norm": 0.00038776840665377676, "learning_rate": 7.4235455499072054e-06, "loss": 0.0039, "step": 23860 }, { "epoch": 17.33, "grad_norm": 0.10173983126878738, "learning_rate": 7.403372871782459e-06, "loss": 0.0053, "step": 23870 }, { "epoch": 17.34, "grad_norm": 0.0036807521246373653, "learning_rate": 7.38320019365771e-06, "loss": 0.0016, "step": 23880 }, { "epoch": 17.34, "grad_norm": 3.253828253946267e-05, "learning_rate": 7.363027515532962e-06, "loss": 0.0051, "step": 23890 }, { "epoch": 17.35, "grad_norm": 0.00014638539869338274, "learning_rate": 7.342854837408214e-06, "loss": 0.0032, "step": 23900 }, { "epoch": 17.36, "grad_norm": 0.0003724195412360132, "learning_rate": 7.322682159283467e-06, "loss": 0.0017, "step": 23910 }, { "epoch": 17.36, "grad_norm": 0.0679226964712143, "learning_rate": 7.302509481158719e-06, "loss": 0.0016, "step": 23920 }, { "epoch": 17.37, "grad_norm": 0.0009348354651592672, "learning_rate": 7.282336803033971e-06, "loss": 0.0043, "step": 23930 }, { "epoch": 17.38, "grad_norm": 0.0015548918163403869, "learning_rate": 7.262164124909223e-06, "loss": 0.0044, "step": 23940 }, { "epoch": 17.39, "grad_norm": 0.00017984594160225242, "learning_rate": 7.241991446784475e-06, "loss": 0.0017, "step": 23950 }, { "epoch": 17.39, "grad_norm": 4.2534215026535094e-05, "learning_rate": 7.221818768659727e-06, "loss": 0.0069, "step": 23960 }, { "epoch": 17.4, "grad_norm": 0.045961715281009674, "learning_rate": 7.20164609053498e-06, "loss": 0.0036, "step": 23970 }, { "epoch": 17.41, "grad_norm": 0.1038808822631836, "learning_rate": 7.181473412410232e-06, "loss": 0.0028, "step": 23980 }, { "epoch": 17.42, "grad_norm": 0.0011143676238134503, "learning_rate": 7.161300734285484e-06, "loss": 0.01, "step": 23990 }, { "epoch": 17.42, "grad_norm": 0.00020528653112705797, "learning_rate": 7.141128056160737e-06, "loss": 0.0113, "step": 24000 }, { "epoch": 17.43, "grad_norm": 0.1908223181962967, "learning_rate": 7.1209553780359885e-06, "loss": 0.0042, "step": 24010 }, { "epoch": 17.44, "grad_norm": 0.05702119693160057, "learning_rate": 7.1007826999112405e-06, "loss": 0.0025, "step": 24020 }, { "epoch": 17.44, "grad_norm": 0.1563175767660141, "learning_rate": 7.080610021786492e-06, "loss": 0.0039, "step": 24030 }, { "epoch": 17.45, "grad_norm": 0.0011573946103453636, "learning_rate": 7.060437343661745e-06, "loss": 0.0006, "step": 24040 }, { "epoch": 17.46, "grad_norm": 0.019460035488009453, "learning_rate": 7.040264665536997e-06, "loss": 0.0014, "step": 24050 }, { "epoch": 17.47, "grad_norm": 0.0006199249182827771, "learning_rate": 7.020091987412249e-06, "loss": 0.0036, "step": 24060 }, { "epoch": 17.47, "grad_norm": 0.001222978811711073, "learning_rate": 6.999919309287502e-06, "loss": 0.0006, "step": 24070 }, { "epoch": 17.48, "grad_norm": 0.08943016082048416, "learning_rate": 6.979746631162754e-06, "loss": 0.0039, "step": 24080 }, { "epoch": 17.49, "grad_norm": 16.41295623779297, "learning_rate": 6.959573953038006e-06, "loss": 0.0012, "step": 24090 }, { "epoch": 17.5, "grad_norm": 0.029908979311585426, "learning_rate": 6.939401274913258e-06, "loss": 0.0067, "step": 24100 }, { "epoch": 17.5, "grad_norm": 0.0022195398341864347, "learning_rate": 6.91922859678851e-06, "loss": 0.0028, "step": 24110 }, { "epoch": 17.51, "grad_norm": 0.0004878832842223346, "learning_rate": 6.899055918663762e-06, "loss": 0.0047, "step": 24120 }, { "epoch": 17.52, "grad_norm": 0.08152516931295395, "learning_rate": 6.878883240539015e-06, "loss": 0.0051, "step": 24130 }, { "epoch": 17.52, "grad_norm": 6.538970774272457e-05, "learning_rate": 6.858710562414267e-06, "loss": 0.0002, "step": 24140 }, { "epoch": 17.53, "grad_norm": 0.22862771153450012, "learning_rate": 6.838537884289518e-06, "loss": 0.0005, "step": 24150 }, { "epoch": 17.54, "grad_norm": 8.339462280273438, "learning_rate": 6.81836520616477e-06, "loss": 0.0024, "step": 24160 }, { "epoch": 17.55, "grad_norm": 0.10850653797388077, "learning_rate": 6.7981925280400236e-06, "loss": 0.0057, "step": 24170 }, { "epoch": 17.55, "grad_norm": 0.007265112828463316, "learning_rate": 6.778019849915275e-06, "loss": 0.0012, "step": 24180 }, { "epoch": 17.56, "grad_norm": 5.9882444475078955e-05, "learning_rate": 6.7578471717905266e-06, "loss": 0.0034, "step": 24190 }, { "epoch": 17.57, "grad_norm": 8.714703290024772e-05, "learning_rate": 6.73767449366578e-06, "loss": 0.0036, "step": 24200 }, { "epoch": 17.58, "grad_norm": 0.001536114257760346, "learning_rate": 6.717501815541031e-06, "loss": 0.0015, "step": 24210 }, { "epoch": 17.58, "grad_norm": 1.9178543880116194e-05, "learning_rate": 6.697329137416283e-06, "loss": 0.0025, "step": 24220 }, { "epoch": 17.59, "grad_norm": 0.05648133158683777, "learning_rate": 6.677156459291537e-06, "loss": 0.0127, "step": 24230 }, { "epoch": 17.6, "grad_norm": 0.00012539050658233464, "learning_rate": 6.656983781166788e-06, "loss": 0.0045, "step": 24240 }, { "epoch": 17.6, "grad_norm": 0.19643142819404602, "learning_rate": 6.63681110304204e-06, "loss": 0.0073, "step": 24250 }, { "epoch": 17.61, "grad_norm": 0.11744473874568939, "learning_rate": 6.616638424917293e-06, "loss": 0.0096, "step": 24260 }, { "epoch": 17.62, "grad_norm": 1.6758296624175273e-05, "learning_rate": 6.5964657467925445e-06, "loss": 0.0054, "step": 24270 }, { "epoch": 17.63, "grad_norm": 0.0006310921744443476, "learning_rate": 6.576293068667796e-06, "loss": 0.0052, "step": 24280 }, { "epoch": 17.63, "grad_norm": 0.36372512578964233, "learning_rate": 6.556120390543048e-06, "loss": 0.0017, "step": 24290 }, { "epoch": 17.64, "grad_norm": 0.05171734094619751, "learning_rate": 6.535947712418301e-06, "loss": 0.0094, "step": 24300 }, { "epoch": 17.65, "grad_norm": 0.0003291558241471648, "learning_rate": 6.515775034293553e-06, "loss": 0.0031, "step": 24310 }, { "epoch": 17.66, "grad_norm": 0.003046097932383418, "learning_rate": 6.495602356168805e-06, "loss": 0.0009, "step": 24320 }, { "epoch": 17.66, "grad_norm": 0.00018167686357628554, "learning_rate": 6.475429678044058e-06, "loss": 0.0075, "step": 24330 }, { "epoch": 17.67, "grad_norm": 0.026283616200089455, "learning_rate": 6.45525699991931e-06, "loss": 0.002, "step": 24340 }, { "epoch": 17.68, "grad_norm": 0.00010824885976035148, "learning_rate": 6.435084321794562e-06, "loss": 0.0032, "step": 24350 }, { "epoch": 17.68, "grad_norm": 9.676727495389059e-05, "learning_rate": 6.414911643669814e-06, "loss": 0.0015, "step": 24360 }, { "epoch": 17.69, "grad_norm": 3.2388255931437016e-05, "learning_rate": 6.394738965545066e-06, "loss": 0.002, "step": 24370 }, { "epoch": 17.7, "grad_norm": 0.00028173986356705427, "learning_rate": 6.374566287420318e-06, "loss": 0.0004, "step": 24380 }, { "epoch": 17.71, "grad_norm": 2.8843047618865967, "learning_rate": 6.354393609295569e-06, "loss": 0.0035, "step": 24390 }, { "epoch": 17.71, "grad_norm": 0.00011344110680511221, "learning_rate": 6.334220931170823e-06, "loss": 0.0033, "step": 24400 }, { "epoch": 17.72, "grad_norm": 0.00039801959064789116, "learning_rate": 6.314048253046075e-06, "loss": 0.0048, "step": 24410 }, { "epoch": 17.73, "grad_norm": 2.9843920856365003e-05, "learning_rate": 6.293875574921326e-06, "loss": 0.0004, "step": 24420 }, { "epoch": 17.74, "grad_norm": 0.00021989627566654235, "learning_rate": 6.2737028967965795e-06, "loss": 0.0, "step": 24430 }, { "epoch": 17.74, "grad_norm": 0.0002266202645841986, "learning_rate": 6.2535302186718314e-06, "loss": 0.006, "step": 24440 }, { "epoch": 17.75, "grad_norm": 0.00015999180322978646, "learning_rate": 6.233357540547083e-06, "loss": 0.0053, "step": 24450 }, { "epoch": 17.76, "grad_norm": 0.002106759464368224, "learning_rate": 6.213184862422335e-06, "loss": 0.0034, "step": 24460 }, { "epoch": 17.76, "grad_norm": 0.09708841145038605, "learning_rate": 6.193012184297588e-06, "loss": 0.0042, "step": 24470 }, { "epoch": 17.77, "grad_norm": 0.00014056751388125122, "learning_rate": 6.172839506172839e-06, "loss": 0.0088, "step": 24480 }, { "epoch": 17.78, "grad_norm": 0.4134717583656311, "learning_rate": 6.152666828048092e-06, "loss": 0.0063, "step": 24490 }, { "epoch": 17.79, "grad_norm": 0.02683611586689949, "learning_rate": 6.132494149923345e-06, "loss": 0.0025, "step": 24500 }, { "epoch": 17.79, "grad_norm": 0.00020469677110668272, "learning_rate": 6.112321471798596e-06, "loss": 0.0, "step": 24510 }, { "epoch": 17.8, "grad_norm": 0.00013897515600547194, "learning_rate": 6.0921487936738485e-06, "loss": 0.0034, "step": 24520 }, { "epoch": 17.81, "grad_norm": 0.0001812389527913183, "learning_rate": 6.071976115549101e-06, "loss": 0.0014, "step": 24530 }, { "epoch": 17.81, "grad_norm": 9.690736624179408e-05, "learning_rate": 6.051803437424352e-06, "loss": 0.0037, "step": 24540 }, { "epoch": 17.82, "grad_norm": 0.0005624780897051096, "learning_rate": 6.031630759299605e-06, "loss": 0.0115, "step": 24550 }, { "epoch": 17.83, "grad_norm": 0.057843539863824844, "learning_rate": 6.011458081174857e-06, "loss": 0.0024, "step": 24560 }, { "epoch": 17.84, "grad_norm": 0.00023191337822936475, "learning_rate": 5.991285403050109e-06, "loss": 0.0079, "step": 24570 }, { "epoch": 17.84, "grad_norm": 0.00026599192642606795, "learning_rate": 5.971112724925362e-06, "loss": 0.0003, "step": 24580 }, { "epoch": 17.85, "grad_norm": 0.0005074123037047684, "learning_rate": 5.950940046800614e-06, "loss": 0.0011, "step": 24590 }, { "epoch": 17.86, "grad_norm": 0.0019474523141980171, "learning_rate": 5.930767368675866e-06, "loss": 0.0, "step": 24600 }, { "epoch": 17.87, "grad_norm": 0.0010078544728457928, "learning_rate": 5.9105946905511175e-06, "loss": 0.0015, "step": 24610 }, { "epoch": 17.87, "grad_norm": 0.0005764566012658179, "learning_rate": 5.89042201242637e-06, "loss": 0.0007, "step": 24620 }, { "epoch": 17.88, "grad_norm": 0.00041168267489410937, "learning_rate": 5.870249334301622e-06, "loss": 0.0031, "step": 24630 }, { "epoch": 17.89, "grad_norm": 0.000586585549172014, "learning_rate": 5.850076656176874e-06, "loss": 0.0055, "step": 24640 }, { "epoch": 17.89, "grad_norm": 0.0007330170483328402, "learning_rate": 5.829903978052127e-06, "loss": 0.0013, "step": 24650 }, { "epoch": 17.9, "grad_norm": 0.0002643170882947743, "learning_rate": 5.809731299927379e-06, "loss": 0.0022, "step": 24660 }, { "epoch": 17.91, "grad_norm": 0.0039410036988556385, "learning_rate": 5.789558621802631e-06, "loss": 0.0055, "step": 24670 }, { "epoch": 17.92, "grad_norm": 11.958067893981934, "learning_rate": 5.769385943677883e-06, "loss": 0.0058, "step": 24680 }, { "epoch": 17.92, "grad_norm": 0.0001658492983551696, "learning_rate": 5.749213265553135e-06, "loss": 0.007, "step": 24690 }, { "epoch": 17.93, "grad_norm": 0.0005535160889849067, "learning_rate": 5.729040587428387e-06, "loss": 0.0069, "step": 24700 }, { "epoch": 17.94, "grad_norm": 0.002007086528465152, "learning_rate": 5.708867909303639e-06, "loss": 0.0068, "step": 24710 }, { "epoch": 17.95, "grad_norm": 0.00019094701565336436, "learning_rate": 5.688695231178891e-06, "loss": 0.0068, "step": 24720 }, { "epoch": 17.95, "grad_norm": 0.24637742340564728, "learning_rate": 5.668522553054144e-06, "loss": 0.0048, "step": 24730 }, { "epoch": 17.96, "grad_norm": 0.00016859486640896648, "learning_rate": 5.648349874929396e-06, "loss": 0.0035, "step": 24740 }, { "epoch": 17.97, "grad_norm": 0.0005759259220212698, "learning_rate": 5.628177196804648e-06, "loss": 0.0044, "step": 24750 }, { "epoch": 17.97, "grad_norm": 0.007888413034379482, "learning_rate": 5.608004518679901e-06, "loss": 0.0041, "step": 24760 }, { "epoch": 17.98, "grad_norm": 9.520780563354492, "learning_rate": 5.5878318405551526e-06, "loss": 0.0161, "step": 24770 }, { "epoch": 17.99, "grad_norm": 0.08241154253482819, "learning_rate": 5.5676591624304045e-06, "loss": 0.0016, "step": 24780 }, { "epoch": 18.0, "grad_norm": 0.044836148619651794, "learning_rate": 5.547486484305656e-06, "loss": 0.002, "step": 24790 }, { "epoch": 18.0, "eval_accuracy": 0.9983550392521668, "eval_f1": 0.9962040891122804, "eval_loss": 0.0032196117099374533, "eval_precision": 0.9991597962505908, "eval_recall": 0.9932658174984339, "eval_roc_auc": 0.999987405141813, "eval_runtime": 386.475, "eval_samples_per_second": 228.082, "eval_steps_per_second": 14.257, "step": 24795 }, { "epoch": 18.0, "grad_norm": 0.0755479708313942, "learning_rate": 5.527313806180909e-06, "loss": 0.0013, "step": 24800 }, { "epoch": 18.01, "grad_norm": 0.04686315357685089, "learning_rate": 5.507141128056161e-06, "loss": 0.0144, "step": 24810 }, { "epoch": 18.02, "grad_norm": 0.00485340366140008, "learning_rate": 5.486968449931413e-06, "loss": 0.0029, "step": 24820 }, { "epoch": 18.03, "grad_norm": 0.00023718834563624114, "learning_rate": 5.466795771806665e-06, "loss": 0.0, "step": 24830 }, { "epoch": 18.03, "grad_norm": 0.012029669247567654, "learning_rate": 5.446623093681918e-06, "loss": 0.004, "step": 24840 }, { "epoch": 18.04, "grad_norm": 0.006431036163121462, "learning_rate": 5.42645041555717e-06, "loss": 0.0012, "step": 24850 }, { "epoch": 18.05, "grad_norm": 0.07365299761295319, "learning_rate": 5.406277737432422e-06, "loss": 0.003, "step": 24860 }, { "epoch": 18.05, "grad_norm": 0.004044768866151571, "learning_rate": 5.3861050593076735e-06, "loss": 0.001, "step": 24870 }, { "epoch": 18.06, "grad_norm": 0.04818055033683777, "learning_rate": 5.365932381182926e-06, "loss": 0.0006, "step": 24880 }, { "epoch": 18.07, "grad_norm": 7.61866249376908e-05, "learning_rate": 5.345759703058178e-06, "loss": 0.0, "step": 24890 }, { "epoch": 18.08, "grad_norm": 0.0008586979820393026, "learning_rate": 5.32558702493343e-06, "loss": 0.005, "step": 24900 }, { "epoch": 18.08, "grad_norm": 0.00014436905621550977, "learning_rate": 5.305414346808683e-06, "loss": 0.0027, "step": 24910 }, { "epoch": 18.09, "grad_norm": 0.00020209125068504363, "learning_rate": 5.285241668683935e-06, "loss": 0.0054, "step": 24920 }, { "epoch": 18.1, "grad_norm": 0.0005764380330219865, "learning_rate": 5.265068990559187e-06, "loss": 0.0023, "step": 24930 }, { "epoch": 18.11, "grad_norm": 0.00015431991778314114, "learning_rate": 5.2448963124344395e-06, "loss": 0.0045, "step": 24940 }, { "epoch": 18.11, "grad_norm": 8.73160533956252e-05, "learning_rate": 5.224723634309691e-06, "loss": 0.0027, "step": 24950 }, { "epoch": 18.12, "grad_norm": 0.0016183574916794896, "learning_rate": 5.204550956184943e-06, "loss": 0.0022, "step": 24960 }, { "epoch": 18.13, "grad_norm": 0.11282764375209808, "learning_rate": 5.184378278060196e-06, "loss": 0.0106, "step": 24970 }, { "epoch": 18.13, "grad_norm": 0.00025519824703224003, "learning_rate": 5.164205599935447e-06, "loss": 0.0006, "step": 24980 }, { "epoch": 18.14, "grad_norm": 0.0002551107027102262, "learning_rate": 5.1440329218107e-06, "loss": 0.0027, "step": 24990 }, { "epoch": 18.15, "grad_norm": 5.992265505483374e-05, "learning_rate": 5.123860243685952e-06, "loss": 0.0026, "step": 25000 }, { "epoch": 18.16, "grad_norm": 5.242994666332379e-05, "learning_rate": 5.103687565561204e-06, "loss": 0.0023, "step": 25010 }, { "epoch": 18.16, "grad_norm": 0.004187633749097586, "learning_rate": 5.083514887436457e-06, "loss": 0.0039, "step": 25020 }, { "epoch": 18.17, "grad_norm": 0.03268939256668091, "learning_rate": 5.0633422093117085e-06, "loss": 0.0009, "step": 25030 }, { "epoch": 18.18, "grad_norm": 0.02059740573167801, "learning_rate": 5.0431695311869605e-06, "loss": 0.0058, "step": 25040 }, { "epoch": 18.19, "grad_norm": 0.0005777775659225881, "learning_rate": 5.022996853062212e-06, "loss": 0.0055, "step": 25050 }, { "epoch": 18.19, "grad_norm": 0.00028460632893256843, "learning_rate": 5.002824174937465e-06, "loss": 0.0072, "step": 25060 }, { "epoch": 18.2, "grad_norm": 0.00011931911285500973, "learning_rate": 4.982651496812717e-06, "loss": 0.0013, "step": 25070 }, { "epoch": 18.21, "grad_norm": 0.028543755412101746, "learning_rate": 4.962478818687969e-06, "loss": 0.0058, "step": 25080 }, { "epoch": 18.21, "grad_norm": 0.06952419131994247, "learning_rate": 4.942306140563222e-06, "loss": 0.0012, "step": 25090 }, { "epoch": 18.22, "grad_norm": 0.0011916140792891383, "learning_rate": 4.922133462438473e-06, "loss": 0.0028, "step": 25100 }, { "epoch": 18.23, "grad_norm": 0.0002467480080667883, "learning_rate": 4.901960784313726e-06, "loss": 0.0053, "step": 25110 }, { "epoch": 18.24, "grad_norm": 0.0004054481105413288, "learning_rate": 4.881788106188978e-06, "loss": 0.0005, "step": 25120 }, { "epoch": 18.24, "grad_norm": 8.49825592013076e-05, "learning_rate": 4.8616154280642295e-06, "loss": 0.0045, "step": 25130 }, { "epoch": 18.25, "grad_norm": 0.11147330701351166, "learning_rate": 4.841442749939482e-06, "loss": 0.0017, "step": 25140 }, { "epoch": 18.26, "grad_norm": 0.0001451838470529765, "learning_rate": 4.821270071814735e-06, "loss": 0.0044, "step": 25150 }, { "epoch": 18.26, "grad_norm": 0.032486509531736374, "learning_rate": 4.801097393689986e-06, "loss": 0.0029, "step": 25160 }, { "epoch": 18.27, "grad_norm": 0.09991168230772018, "learning_rate": 4.780924715565239e-06, "loss": 0.0026, "step": 25170 }, { "epoch": 18.28, "grad_norm": 0.0005969495978206396, "learning_rate": 4.760752037440491e-06, "loss": 0.0037, "step": 25180 }, { "epoch": 18.29, "grad_norm": 0.7579265236854553, "learning_rate": 4.740579359315743e-06, "loss": 0.0061, "step": 25190 }, { "epoch": 18.29, "grad_norm": 0.11252560466527939, "learning_rate": 4.7204066811909955e-06, "loss": 0.0023, "step": 25200 }, { "epoch": 18.3, "grad_norm": 0.00014016269415151328, "learning_rate": 4.700234003066247e-06, "loss": 0.0018, "step": 25210 }, { "epoch": 18.31, "grad_norm": 0.09807706624269485, "learning_rate": 4.680061324941499e-06, "loss": 0.0042, "step": 25220 }, { "epoch": 18.32, "grad_norm": 0.1528569459915161, "learning_rate": 4.659888646816751e-06, "loss": 0.0058, "step": 25230 }, { "epoch": 18.32, "grad_norm": 3.9896687667351216e-05, "learning_rate": 4.639715968692004e-06, "loss": 0.0052, "step": 25240 }, { "epoch": 18.33, "grad_norm": 0.040525808930397034, "learning_rate": 4.619543290567256e-06, "loss": 0.0044, "step": 25250 }, { "epoch": 18.34, "grad_norm": 0.04245885834097862, "learning_rate": 4.599370612442508e-06, "loss": 0.0131, "step": 25260 }, { "epoch": 18.34, "grad_norm": 7.739640568615869e-05, "learning_rate": 4.579197934317761e-06, "loss": 0.0012, "step": 25270 }, { "epoch": 18.35, "grad_norm": 0.00031783172744326293, "learning_rate": 4.5590252561930126e-06, "loss": 0.0037, "step": 25280 }, { "epoch": 18.36, "grad_norm": 0.053188130259513855, "learning_rate": 4.5388525780682645e-06, "loss": 0.0017, "step": 25290 }, { "epoch": 18.37, "grad_norm": 0.0014969066251069307, "learning_rate": 4.518679899943517e-06, "loss": 0.0066, "step": 25300 }, { "epoch": 18.37, "grad_norm": 0.14414730668067932, "learning_rate": 4.498507221818768e-06, "loss": 0.0023, "step": 25310 }, { "epoch": 18.38, "grad_norm": 0.00036431997432373464, "learning_rate": 4.478334543694021e-06, "loss": 0.0004, "step": 25320 }, { "epoch": 18.39, "grad_norm": 0.0006099030142650008, "learning_rate": 4.458161865569274e-06, "loss": 0.0041, "step": 25330 }, { "epoch": 18.4, "grad_norm": 0.0006587031530216336, "learning_rate": 4.437989187444525e-06, "loss": 0.0031, "step": 25340 }, { "epoch": 18.4, "grad_norm": 0.13671369850635529, "learning_rate": 4.417816509319778e-06, "loss": 0.0041, "step": 25350 }, { "epoch": 18.41, "grad_norm": 0.0011476201470941305, "learning_rate": 4.39764383119503e-06, "loss": 0.0093, "step": 25360 }, { "epoch": 18.42, "grad_norm": 0.03173692151904106, "learning_rate": 4.377471153070282e-06, "loss": 0.0039, "step": 25370 }, { "epoch": 18.42, "grad_norm": 2.042336382146459e-05, "learning_rate": 4.357298474945534e-06, "loss": 0.0035, "step": 25380 }, { "epoch": 18.43, "grad_norm": 0.0008917959057725966, "learning_rate": 4.337125796820786e-06, "loss": 0.0042, "step": 25390 }, { "epoch": 18.44, "grad_norm": 0.04398633912205696, "learning_rate": 4.316953118696038e-06, "loss": 0.0016, "step": 25400 }, { "epoch": 18.45, "grad_norm": 0.0733947828412056, "learning_rate": 4.29678044057129e-06, "loss": 0.0044, "step": 25410 }, { "epoch": 18.45, "grad_norm": 19.26169776916504, "learning_rate": 4.276607762446543e-06, "loss": 0.0123, "step": 25420 }, { "epoch": 18.46, "grad_norm": 0.09142892807722092, "learning_rate": 4.256435084321795e-06, "loss": 0.0041, "step": 25430 }, { "epoch": 18.47, "grad_norm": 0.006276868283748627, "learning_rate": 4.236262406197047e-06, "loss": 0.0035, "step": 25440 }, { "epoch": 18.48, "grad_norm": 0.035533662885427475, "learning_rate": 4.2160897280722995e-06, "loss": 0.0023, "step": 25450 }, { "epoch": 18.48, "grad_norm": 0.00048704142682254314, "learning_rate": 4.1959170499475514e-06, "loss": 0.0045, "step": 25460 }, { "epoch": 18.49, "grad_norm": 1.3611451387405396, "learning_rate": 4.175744371822803e-06, "loss": 0.0065, "step": 25470 }, { "epoch": 18.5, "grad_norm": 0.1825874298810959, "learning_rate": 4.155571693698055e-06, "loss": 0.0021, "step": 25480 }, { "epoch": 18.5, "grad_norm": 0.11230570822954178, "learning_rate": 4.135399015573307e-06, "loss": 0.0077, "step": 25490 }, { "epoch": 18.51, "grad_norm": 0.04604954272508621, "learning_rate": 4.11522633744856e-06, "loss": 0.0046, "step": 25500 }, { "epoch": 18.52, "grad_norm": 0.00013642838166560978, "learning_rate": 4.095053659323812e-06, "loss": 0.0021, "step": 25510 }, { "epoch": 18.53, "grad_norm": 4.127192369196564e-05, "learning_rate": 4.074880981199064e-06, "loss": 0.008, "step": 25520 }, { "epoch": 18.53, "grad_norm": 0.0006564307259395719, "learning_rate": 4.054708303074317e-06, "loss": 0.0053, "step": 25530 }, { "epoch": 18.54, "grad_norm": 5.794021853944287e-05, "learning_rate": 4.0345356249495685e-06, "loss": 0.0004, "step": 25540 }, { "epoch": 18.55, "grad_norm": 0.003438483690842986, "learning_rate": 4.0143629468248205e-06, "loss": 0.005, "step": 25550 }, { "epoch": 18.56, "grad_norm": 0.00017284188652411103, "learning_rate": 3.994190268700073e-06, "loss": 0.0028, "step": 25560 }, { "epoch": 18.56, "grad_norm": 0.00043344812002032995, "learning_rate": 3.974017590575325e-06, "loss": 0.0009, "step": 25570 }, { "epoch": 18.57, "grad_norm": 3.957462831749581e-05, "learning_rate": 3.953844912450577e-06, "loss": 0.0033, "step": 25580 }, { "epoch": 18.58, "grad_norm": 0.08866414427757263, "learning_rate": 3.93367223432583e-06, "loss": 0.0011, "step": 25590 }, { "epoch": 18.58, "grad_norm": 0.000915550219360739, "learning_rate": 3.913499556201082e-06, "loss": 0.0028, "step": 25600 }, { "epoch": 18.59, "grad_norm": 5.103804141981527e-05, "learning_rate": 3.893326878076334e-06, "loss": 0.0009, "step": 25610 }, { "epoch": 18.6, "grad_norm": 0.21407219767570496, "learning_rate": 3.873154199951586e-06, "loss": 0.0037, "step": 25620 }, { "epoch": 18.61, "grad_norm": 0.12557661533355713, "learning_rate": 3.8529815218268376e-06, "loss": 0.0067, "step": 25630 }, { "epoch": 18.61, "grad_norm": 0.004426570143550634, "learning_rate": 3.83280884370209e-06, "loss": 0.0013, "step": 25640 }, { "epoch": 18.62, "grad_norm": 0.0005717293825000525, "learning_rate": 3.8126361655773422e-06, "loss": 0.0113, "step": 25650 }, { "epoch": 18.63, "grad_norm": 0.1541978120803833, "learning_rate": 3.7924634874525946e-06, "loss": 0.0131, "step": 25660 }, { "epoch": 18.64, "grad_norm": 0.0027115640696138144, "learning_rate": 3.772290809327846e-06, "loss": 0.0037, "step": 25670 }, { "epoch": 18.64, "grad_norm": 7.32510961825028e-05, "learning_rate": 3.752118131203099e-06, "loss": 0.0075, "step": 25680 }, { "epoch": 18.65, "grad_norm": 0.0003670882142614573, "learning_rate": 3.7319454530783512e-06, "loss": 0.0009, "step": 25690 }, { "epoch": 18.66, "grad_norm": 3.481513704173267e-05, "learning_rate": 3.7117727749536027e-06, "loss": 0.0057, "step": 25700 }, { "epoch": 18.66, "grad_norm": 0.00047917303163558245, "learning_rate": 3.691600096828855e-06, "loss": 0.0032, "step": 25710 }, { "epoch": 18.67, "grad_norm": 0.0021459930576384068, "learning_rate": 3.671427418704107e-06, "loss": 0.003, "step": 25720 }, { "epoch": 18.68, "grad_norm": 0.0027416530065238476, "learning_rate": 3.6512547405793593e-06, "loss": 0.0011, "step": 25730 }, { "epoch": 18.69, "grad_norm": 0.05313531309366226, "learning_rate": 3.6310820624546117e-06, "loss": 0.0027, "step": 25740 }, { "epoch": 18.69, "grad_norm": 0.16513313353061676, "learning_rate": 3.6109093843298636e-06, "loss": 0.0054, "step": 25750 }, { "epoch": 18.7, "grad_norm": 0.003389935242012143, "learning_rate": 3.590736706205116e-06, "loss": 0.005, "step": 25760 }, { "epoch": 18.71, "grad_norm": 0.0002206418284913525, "learning_rate": 3.5705640280803683e-06, "loss": 0.0035, "step": 25770 }, { "epoch": 18.72, "grad_norm": 0.0001424114016117528, "learning_rate": 3.5503913499556202e-06, "loss": 0.0064, "step": 25780 }, { "epoch": 18.72, "grad_norm": 0.032624099403619766, "learning_rate": 3.5302186718308726e-06, "loss": 0.0027, "step": 25790 }, { "epoch": 18.73, "grad_norm": 0.0008412267197854817, "learning_rate": 3.5100459937061245e-06, "loss": 0.0014, "step": 25800 }, { "epoch": 18.74, "grad_norm": 0.04843832924962044, "learning_rate": 3.489873315581377e-06, "loss": 0.0024, "step": 25810 }, { "epoch": 18.74, "grad_norm": 0.056923747062683105, "learning_rate": 3.469700637456629e-06, "loss": 0.0022, "step": 25820 }, { "epoch": 18.75, "grad_norm": 0.0007223181310109794, "learning_rate": 3.449527959331881e-06, "loss": 0.0046, "step": 25830 }, { "epoch": 18.76, "grad_norm": 0.0008658911683596671, "learning_rate": 3.4293552812071335e-06, "loss": 0.0019, "step": 25840 }, { "epoch": 18.77, "grad_norm": 0.00013374777336139232, "learning_rate": 3.409182603082385e-06, "loss": 0.0021, "step": 25850 }, { "epoch": 18.77, "grad_norm": 0.0037013725377619267, "learning_rate": 3.3890099249576373e-06, "loss": 0.0085, "step": 25860 }, { "epoch": 18.78, "grad_norm": 0.0005560120334848762, "learning_rate": 3.36883724683289e-06, "loss": 0.0059, "step": 25870 }, { "epoch": 18.79, "grad_norm": 0.003002674551680684, "learning_rate": 3.3486645687081416e-06, "loss": 0.0031, "step": 25880 }, { "epoch": 18.79, "grad_norm": 0.0007660058909095824, "learning_rate": 3.328491890583394e-06, "loss": 0.0029, "step": 25890 }, { "epoch": 18.8, "grad_norm": 0.00030720618087798357, "learning_rate": 3.3083192124586467e-06, "loss": 0.0004, "step": 25900 }, { "epoch": 18.81, "grad_norm": 0.00031959637999534607, "learning_rate": 3.288146534333898e-06, "loss": 0.0027, "step": 25910 }, { "epoch": 18.82, "grad_norm": 0.0005406465497799218, "learning_rate": 3.2679738562091506e-06, "loss": 0.0016, "step": 25920 }, { "epoch": 18.82, "grad_norm": 0.0006833134684711695, "learning_rate": 3.2478011780844025e-06, "loss": 0.0013, "step": 25930 }, { "epoch": 18.83, "grad_norm": 6.237076013348997e-05, "learning_rate": 3.227628499959655e-06, "loss": 0.0039, "step": 25940 }, { "epoch": 18.84, "grad_norm": 0.00028509943513199687, "learning_rate": 3.207455821834907e-06, "loss": 0.0013, "step": 25950 }, { "epoch": 18.85, "grad_norm": 0.05667322129011154, "learning_rate": 3.187283143710159e-06, "loss": 0.0011, "step": 25960 }, { "epoch": 18.85, "grad_norm": 0.00010201996337855235, "learning_rate": 3.1671104655854115e-06, "loss": 0.0021, "step": 25970 }, { "epoch": 18.86, "grad_norm": 0.0016317203408107162, "learning_rate": 3.146937787460663e-06, "loss": 0.0009, "step": 25980 }, { "epoch": 18.87, "grad_norm": 0.0036815290804952383, "learning_rate": 3.1267651093359157e-06, "loss": 0.0085, "step": 25990 }, { "epoch": 18.87, "grad_norm": 0.027603503316640854, "learning_rate": 3.1065924312111676e-06, "loss": 0.0007, "step": 26000 }, { "epoch": 18.88, "grad_norm": 0.02139933593571186, "learning_rate": 3.0864197530864196e-06, "loss": 0.0056, "step": 26010 }, { "epoch": 18.89, "grad_norm": 8.762301149545237e-05, "learning_rate": 3.0662470749616723e-06, "loss": 0.0031, "step": 26020 }, { "epoch": 18.9, "grad_norm": 0.00036692939465865493, "learning_rate": 3.0460743968369243e-06, "loss": 0.0017, "step": 26030 }, { "epoch": 18.9, "grad_norm": 0.10278957337141037, "learning_rate": 3.025901718712176e-06, "loss": 0.0057, "step": 26040 }, { "epoch": 18.91, "grad_norm": 5.523693471332081e-05, "learning_rate": 3.0057290405874285e-06, "loss": 0.0004, "step": 26050 }, { "epoch": 18.92, "grad_norm": 0.0008847813005559146, "learning_rate": 2.985556362462681e-06, "loss": 0.0003, "step": 26060 }, { "epoch": 18.93, "grad_norm": 5.930370025453158e-05, "learning_rate": 2.965383684337933e-06, "loss": 0.0007, "step": 26070 }, { "epoch": 18.93, "grad_norm": 0.002461926778778434, "learning_rate": 2.945211006213185e-06, "loss": 0.0, "step": 26080 }, { "epoch": 18.94, "grad_norm": 0.0001589566090842709, "learning_rate": 2.925038328088437e-06, "loss": 0.0038, "step": 26090 }, { "epoch": 18.95, "grad_norm": 0.0006895341211929917, "learning_rate": 2.9048656499636894e-06, "loss": 0.0037, "step": 26100 }, { "epoch": 18.95, "grad_norm": 3.800967169809155e-05, "learning_rate": 2.8846929718389414e-06, "loss": 0.002, "step": 26110 }, { "epoch": 18.96, "grad_norm": 0.007250432390719652, "learning_rate": 2.8645202937141937e-06, "loss": 0.0004, "step": 26120 }, { "epoch": 18.97, "grad_norm": 0.0022204366978257895, "learning_rate": 2.8443476155894456e-06, "loss": 0.0108, "step": 26130 }, { "epoch": 18.98, "grad_norm": 3.95225033571478e-05, "learning_rate": 2.824174937464698e-06, "loss": 0.0032, "step": 26140 }, { "epoch": 18.98, "grad_norm": 0.332366406917572, "learning_rate": 2.8040022593399503e-06, "loss": 0.0072, "step": 26150 }, { "epoch": 18.99, "grad_norm": 3.236500560888089e-05, "learning_rate": 2.7838295812152022e-06, "loss": 0.0011, "step": 26160 }, { "epoch": 19.0, "grad_norm": 0.00018790685862768441, "learning_rate": 2.7636569030904546e-06, "loss": 0.0024, "step": 26170 }, { "epoch": 19.0, "eval_accuracy": 0.99837772836593, "eval_f1": 0.9962558584033724, "eval_loss": 0.0031478386372327805, "eval_precision": 0.9993696485790828, "eval_recall": 0.993161411568177, "eval_roc_auc": 0.9999876608903375, "eval_runtime": 387.535, "eval_samples_per_second": 227.458, "eval_steps_per_second": 14.218, "step": 26172 }, { "epoch": 19.01, "grad_norm": 0.03183314949274063, "learning_rate": 2.7434842249657065e-06, "loss": 0.0072, "step": 26180 }, { "epoch": 19.01, "grad_norm": 0.0002327613183297217, "learning_rate": 2.723311546840959e-06, "loss": 0.0049, "step": 26190 }, { "epoch": 19.02, "grad_norm": 0.0012804355937987566, "learning_rate": 2.703138868716211e-06, "loss": 0.0102, "step": 26200 }, { "epoch": 19.03, "grad_norm": 0.0003078650333918631, "learning_rate": 2.682966190591463e-06, "loss": 0.003, "step": 26210 }, { "epoch": 19.03, "grad_norm": 0.09908101707696915, "learning_rate": 2.662793512466715e-06, "loss": 0.009, "step": 26220 }, { "epoch": 19.04, "grad_norm": 0.09956044703722, "learning_rate": 2.6426208343419674e-06, "loss": 0.0048, "step": 26230 }, { "epoch": 19.05, "grad_norm": 0.00046755580115132034, "learning_rate": 2.6224481562172198e-06, "loss": 0.0025, "step": 26240 }, { "epoch": 19.06, "grad_norm": 3.5852412111125886e-05, "learning_rate": 2.6022754780924717e-06, "loss": 0.0, "step": 26250 }, { "epoch": 19.06, "grad_norm": 0.00015138008166104555, "learning_rate": 2.5821027999677236e-06, "loss": 0.002, "step": 26260 }, { "epoch": 19.07, "grad_norm": 0.00530798826366663, "learning_rate": 2.561930121842976e-06, "loss": 0.0015, "step": 26270 }, { "epoch": 19.08, "grad_norm": 0.0002312654396519065, "learning_rate": 2.5417574437182283e-06, "loss": 0.0004, "step": 26280 }, { "epoch": 19.09, "grad_norm": 0.0009702076204121113, "learning_rate": 2.5215847655934802e-06, "loss": 0.0054, "step": 26290 }, { "epoch": 19.09, "grad_norm": 6.126202788436785e-05, "learning_rate": 2.5014120874687326e-06, "loss": 0.001, "step": 26300 }, { "epoch": 19.1, "grad_norm": 1.3822760581970215, "learning_rate": 2.4812394093439845e-06, "loss": 0.0025, "step": 26310 }, { "epoch": 19.11, "grad_norm": 0.049456097185611725, "learning_rate": 2.4610667312192364e-06, "loss": 0.0023, "step": 26320 }, { "epoch": 19.11, "grad_norm": 0.00021031413052696735, "learning_rate": 2.440894053094489e-06, "loss": 0.0016, "step": 26330 }, { "epoch": 19.12, "grad_norm": 0.04800700768828392, "learning_rate": 2.420721374969741e-06, "loss": 0.0021, "step": 26340 }, { "epoch": 19.13, "grad_norm": 0.009497404098510742, "learning_rate": 2.400548696844993e-06, "loss": 0.003, "step": 26350 }, { "epoch": 19.14, "grad_norm": 0.04641493409872055, "learning_rate": 2.3803760187202454e-06, "loss": 0.0017, "step": 26360 }, { "epoch": 19.14, "grad_norm": 9.963675984181464e-05, "learning_rate": 2.3602033405954977e-06, "loss": 0.0017, "step": 26370 }, { "epoch": 19.15, "grad_norm": 0.001153616583906114, "learning_rate": 2.3400306624707497e-06, "loss": 0.0041, "step": 26380 }, { "epoch": 19.16, "grad_norm": 0.0001510605070507154, "learning_rate": 2.319857984346002e-06, "loss": 0.0007, "step": 26390 }, { "epoch": 19.17, "grad_norm": 7.923934754217044e-05, "learning_rate": 2.299685306221254e-06, "loss": 0.0076, "step": 26400 }, { "epoch": 19.17, "grad_norm": 0.00012218714982736856, "learning_rate": 2.2795126280965063e-06, "loss": 0.0021, "step": 26410 }, { "epoch": 19.18, "grad_norm": 0.0008200127049349248, "learning_rate": 2.2593399499717586e-06, "loss": 0.0035, "step": 26420 }, { "epoch": 19.19, "grad_norm": 0.05239934101700783, "learning_rate": 2.2391672718470106e-06, "loss": 0.0025, "step": 26430 }, { "epoch": 19.19, "grad_norm": 3.865266262437217e-05, "learning_rate": 2.2189945937222625e-06, "loss": 0.0018, "step": 26440 }, { "epoch": 19.2, "grad_norm": 8.964262815425172e-05, "learning_rate": 2.198821915597515e-06, "loss": 0.0041, "step": 26450 }, { "epoch": 19.21, "grad_norm": 0.0628039538860321, "learning_rate": 2.178649237472767e-06, "loss": 0.0062, "step": 26460 }, { "epoch": 19.22, "grad_norm": 0.048729509115219116, "learning_rate": 2.158476559348019e-06, "loss": 0.0011, "step": 26470 }, { "epoch": 19.22, "grad_norm": 0.00014290747640188783, "learning_rate": 2.1383038812232715e-06, "loss": 0.0031, "step": 26480 }, { "epoch": 19.23, "grad_norm": 0.0006844107992947102, "learning_rate": 2.1181312030985234e-06, "loss": 0.0025, "step": 26490 }, { "epoch": 19.24, "grad_norm": 0.014656140469014645, "learning_rate": 2.0979585249737757e-06, "loss": 0.0143, "step": 26500 }, { "epoch": 19.25, "grad_norm": 0.0003018031711690128, "learning_rate": 2.0777858468490276e-06, "loss": 0.0038, "step": 26510 }, { "epoch": 19.25, "grad_norm": 6.837755790911615e-05, "learning_rate": 2.05761316872428e-06, "loss": 0.006, "step": 26520 }, { "epoch": 19.26, "grad_norm": 0.09663214534521103, "learning_rate": 2.037440490599532e-06, "loss": 0.0061, "step": 26530 }, { "epoch": 19.27, "grad_norm": 0.002400600351393223, "learning_rate": 2.0172678124747843e-06, "loss": 0.0026, "step": 26540 }, { "epoch": 19.27, "grad_norm": 0.046237923204898834, "learning_rate": 1.9970951343500366e-06, "loss": 0.0031, "step": 26550 }, { "epoch": 19.28, "grad_norm": 9.103987395064905e-05, "learning_rate": 1.9769224562252885e-06, "loss": 0.0008, "step": 26560 }, { "epoch": 19.29, "grad_norm": 0.02744593285024166, "learning_rate": 1.956749778100541e-06, "loss": 0.0009, "step": 26570 }, { "epoch": 19.3, "grad_norm": 0.00013158208457753062, "learning_rate": 1.936577099975793e-06, "loss": 0.0046, "step": 26580 }, { "epoch": 19.3, "grad_norm": 3.296025897725485e-05, "learning_rate": 1.916404421851045e-06, "loss": 0.0047, "step": 26590 }, { "epoch": 19.31, "grad_norm": 0.0005842273822054267, "learning_rate": 1.8962317437262973e-06, "loss": 0.0044, "step": 26600 }, { "epoch": 19.32, "grad_norm": 0.0005419492954388261, "learning_rate": 1.8760590656015494e-06, "loss": 0.0014, "step": 26610 }, { "epoch": 19.32, "grad_norm": 0.00013932572619523853, "learning_rate": 1.8558863874768014e-06, "loss": 0.0, "step": 26620 }, { "epoch": 19.33, "grad_norm": 0.03754022344946861, "learning_rate": 1.8357137093520535e-06, "loss": 0.0092, "step": 26630 }, { "epoch": 19.34, "grad_norm": 68.23185729980469, "learning_rate": 1.8155410312273058e-06, "loss": 0.0017, "step": 26640 }, { "epoch": 19.35, "grad_norm": 0.0001436081511201337, "learning_rate": 1.795368353102558e-06, "loss": 0.0023, "step": 26650 }, { "epoch": 19.35, "grad_norm": 0.002541495719924569, "learning_rate": 1.7751956749778101e-06, "loss": 0.0013, "step": 26660 }, { "epoch": 19.36, "grad_norm": 0.146305650472641, "learning_rate": 1.7550229968530623e-06, "loss": 0.0059, "step": 26670 }, { "epoch": 19.37, "grad_norm": 0.09827929735183716, "learning_rate": 1.7348503187283146e-06, "loss": 0.0024, "step": 26680 }, { "epoch": 19.38, "grad_norm": 0.005435886327177286, "learning_rate": 1.7146776406035667e-06, "loss": 0.0049, "step": 26690 }, { "epoch": 19.38, "grad_norm": 0.00013977414346300066, "learning_rate": 1.6945049624788187e-06, "loss": 0.0009, "step": 26700 }, { "epoch": 19.39, "grad_norm": 0.0022497973404824734, "learning_rate": 1.6743322843540708e-06, "loss": 0.0031, "step": 26710 }, { "epoch": 19.4, "grad_norm": 0.0001336606073891744, "learning_rate": 1.6541596062293234e-06, "loss": 0.0049, "step": 26720 }, { "epoch": 19.4, "grad_norm": 0.1333574801683426, "learning_rate": 1.6339869281045753e-06, "loss": 0.0085, "step": 26730 }, { "epoch": 19.41, "grad_norm": 0.00023709374363534153, "learning_rate": 1.6138142499798274e-06, "loss": 0.0055, "step": 26740 }, { "epoch": 19.42, "grad_norm": 0.030887536704540253, "learning_rate": 1.5936415718550796e-06, "loss": 0.0025, "step": 26750 }, { "epoch": 19.43, "grad_norm": 0.0002531045174691826, "learning_rate": 1.5734688937303315e-06, "loss": 0.0048, "step": 26760 }, { "epoch": 19.43, "grad_norm": 9.092326217796654e-05, "learning_rate": 1.5532962156055838e-06, "loss": 0.0003, "step": 26770 }, { "epoch": 19.44, "grad_norm": 0.09485254436731339, "learning_rate": 1.5331235374808362e-06, "loss": 0.0018, "step": 26780 }, { "epoch": 19.45, "grad_norm": 0.00014884411939419806, "learning_rate": 1.512950859356088e-06, "loss": 0.0024, "step": 26790 }, { "epoch": 19.46, "grad_norm": 0.1054830327630043, "learning_rate": 1.4927781812313404e-06, "loss": 0.0056, "step": 26800 }, { "epoch": 19.46, "grad_norm": 0.00028030065004713833, "learning_rate": 1.4726055031065926e-06, "loss": 0.0004, "step": 26810 }, { "epoch": 19.47, "grad_norm": 0.07415413856506348, "learning_rate": 1.4524328249818447e-06, "loss": 0.0062, "step": 26820 }, { "epoch": 19.48, "grad_norm": 0.000137577997520566, "learning_rate": 1.4322601468570969e-06, "loss": 0.001, "step": 26830 }, { "epoch": 19.48, "grad_norm": 0.14892277121543884, "learning_rate": 1.412087468732349e-06, "loss": 0.0072, "step": 26840 }, { "epoch": 19.49, "grad_norm": 0.10421720147132874, "learning_rate": 1.3919147906076011e-06, "loss": 0.0066, "step": 26850 }, { "epoch": 19.5, "grad_norm": 0.00022085083764977753, "learning_rate": 1.3717421124828533e-06, "loss": 0.0005, "step": 26860 }, { "epoch": 19.51, "grad_norm": 0.05538201332092285, "learning_rate": 1.3515694343581054e-06, "loss": 0.0041, "step": 26870 }, { "epoch": 19.51, "grad_norm": 0.03088965266942978, "learning_rate": 1.3313967562333575e-06, "loss": 0.0065, "step": 26880 }, { "epoch": 19.52, "grad_norm": 0.00028383126482367516, "learning_rate": 1.3112240781086099e-06, "loss": 0.0015, "step": 26890 }, { "epoch": 19.53, "grad_norm": 0.00039556692354381084, "learning_rate": 1.2910513999838618e-06, "loss": 0.0021, "step": 26900 }, { "epoch": 19.54, "grad_norm": 0.0002842575777322054, "learning_rate": 1.2708787218591142e-06, "loss": 0.0041, "step": 26910 }, { "epoch": 19.54, "grad_norm": 0.10765402764081955, "learning_rate": 1.2507060437343663e-06, "loss": 0.0022, "step": 26920 }, { "epoch": 19.55, "grad_norm": 0.0026703434996306896, "learning_rate": 1.2305333656096182e-06, "loss": 0.0026, "step": 26930 }, { "epoch": 19.56, "grad_norm": 0.00041996565414592624, "learning_rate": 1.2103606874848706e-06, "loss": 0.0023, "step": 26940 }, { "epoch": 19.56, "grad_norm": 0.0009542067418806255, "learning_rate": 1.1901880093601227e-06, "loss": 0.0033, "step": 26950 }, { "epoch": 19.57, "grad_norm": 0.00019776627596002072, "learning_rate": 1.1700153312353748e-06, "loss": 0.0025, "step": 26960 }, { "epoch": 19.58, "grad_norm": 0.0005681757465936244, "learning_rate": 1.149842653110627e-06, "loss": 0.0045, "step": 26970 }, { "epoch": 19.59, "grad_norm": 0.09003600478172302, "learning_rate": 1.1296699749858793e-06, "loss": 0.0034, "step": 26980 }, { "epoch": 19.59, "grad_norm": 4.978038850822486e-05, "learning_rate": 1.1094972968611312e-06, "loss": 0.0007, "step": 26990 }, { "epoch": 19.6, "grad_norm": 6.065259367460385e-05, "learning_rate": 1.0893246187363836e-06, "loss": 0.0038, "step": 27000 }, { "epoch": 19.61, "grad_norm": 0.00032748805824667215, "learning_rate": 1.0691519406116357e-06, "loss": 0.0044, "step": 27010 }, { "epoch": 19.62, "grad_norm": 0.14997267723083496, "learning_rate": 1.0489792624868879e-06, "loss": 0.0059, "step": 27020 }, { "epoch": 19.62, "grad_norm": 0.0001299285504501313, "learning_rate": 1.02880658436214e-06, "loss": 0.0044, "step": 27030 }, { "epoch": 19.63, "grad_norm": 0.0002876422367990017, "learning_rate": 1.0086339062373921e-06, "loss": 0.0017, "step": 27040 }, { "epoch": 19.64, "grad_norm": 2.906100417021662e-05, "learning_rate": 9.884612281126443e-07, "loss": 0.0036, "step": 27050 }, { "epoch": 19.64, "grad_norm": 0.000504647905472666, "learning_rate": 9.682885499878964e-07, "loss": 0.0021, "step": 27060 }, { "epoch": 19.65, "grad_norm": 0.00014128659677226096, "learning_rate": 9.481158718631486e-07, "loss": 0.0044, "step": 27070 }, { "epoch": 19.66, "grad_norm": 0.056433625519275665, "learning_rate": 9.279431937384007e-07, "loss": 0.0039, "step": 27080 }, { "epoch": 19.67, "grad_norm": 2.6254705517203547e-05, "learning_rate": 9.077705156136529e-07, "loss": 0.0012, "step": 27090 }, { "epoch": 19.67, "grad_norm": 0.0007919368799775839, "learning_rate": 8.875978374889051e-07, "loss": 0.0006, "step": 27100 }, { "epoch": 19.68, "grad_norm": 0.12109678238630295, "learning_rate": 8.674251593641573e-07, "loss": 0.0053, "step": 27110 }, { "epoch": 19.69, "grad_norm": 0.0010482355719432235, "learning_rate": 8.472524812394093e-07, "loss": 0.0039, "step": 27120 }, { "epoch": 19.7, "grad_norm": 0.11032991856336594, "learning_rate": 8.270798031146617e-07, "loss": 0.0013, "step": 27130 }, { "epoch": 19.7, "grad_norm": 0.0005860592355020344, "learning_rate": 8.069071249899137e-07, "loss": 0.002, "step": 27140 }, { "epoch": 19.71, "grad_norm": 0.0007409679819829762, "learning_rate": 7.867344468651657e-07, "loss": 0.0012, "step": 27150 }, { "epoch": 19.72, "grad_norm": 0.13628806173801422, "learning_rate": 7.665617687404181e-07, "loss": 0.0068, "step": 27160 }, { "epoch": 19.72, "grad_norm": 4.722082303487696e-05, "learning_rate": 7.463890906156702e-07, "loss": 0.0014, "step": 27170 }, { "epoch": 19.73, "grad_norm": 0.01145484484732151, "learning_rate": 7.262164124909224e-07, "loss": 0.0011, "step": 27180 }, { "epoch": 19.74, "grad_norm": 3.4825185139197856e-05, "learning_rate": 7.060437343661745e-07, "loss": 0.0032, "step": 27190 }, { "epoch": 19.75, "grad_norm": 0.03905890882015228, "learning_rate": 6.858710562414266e-07, "loss": 0.0013, "step": 27200 }, { "epoch": 19.75, "grad_norm": 6.594491424039006e-05, "learning_rate": 6.656983781166788e-07, "loss": 0.0007, "step": 27210 }, { "epoch": 19.76, "grad_norm": 0.17883096635341644, "learning_rate": 6.455256999919309e-07, "loss": 0.0057, "step": 27220 }, { "epoch": 19.77, "grad_norm": 1.921937109727878e-05, "learning_rate": 6.253530218671831e-07, "loss": 0.0017, "step": 27230 }, { "epoch": 19.77, "grad_norm": 0.00039736239705234766, "learning_rate": 6.051803437424353e-07, "loss": 0.0073, "step": 27240 }, { "epoch": 19.78, "grad_norm": 0.006403313484042883, "learning_rate": 5.850076656176874e-07, "loss": 0.0032, "step": 27250 }, { "epoch": 19.79, "grad_norm": 0.0003711440076585859, "learning_rate": 5.648349874929397e-07, "loss": 0.0011, "step": 27260 }, { "epoch": 19.8, "grad_norm": 0.00021775624190922827, "learning_rate": 5.446623093681918e-07, "loss": 0.0016, "step": 27270 }, { "epoch": 19.8, "grad_norm": 0.09723830223083496, "learning_rate": 5.244896312434439e-07, "loss": 0.0037, "step": 27280 }, { "epoch": 19.81, "grad_norm": 3.505588392727077e-05, "learning_rate": 5.043169531186961e-07, "loss": 0.0043, "step": 27290 }, { "epoch": 19.82, "grad_norm": 0.13812561333179474, "learning_rate": 4.841442749939482e-07, "loss": 0.0048, "step": 27300 }, { "epoch": 19.83, "grad_norm": 0.0003846702165901661, "learning_rate": 4.6397159686920034e-07, "loss": 0.0018, "step": 27310 }, { "epoch": 19.83, "grad_norm": 0.0003796774835791439, "learning_rate": 4.4379891874445253e-07, "loss": 0.0038, "step": 27320 }, { "epoch": 19.84, "grad_norm": 0.026537004858255386, "learning_rate": 4.2362624061970466e-07, "loss": 0.0026, "step": 27330 }, { "epoch": 19.85, "grad_norm": 0.10081913322210312, "learning_rate": 4.0345356249495685e-07, "loss": 0.0022, "step": 27340 }, { "epoch": 19.85, "grad_norm": 0.13242417573928833, "learning_rate": 3.8328088437020904e-07, "loss": 0.007, "step": 27350 }, { "epoch": 19.86, "grad_norm": 0.0003329771861899644, "learning_rate": 3.631082062454612e-07, "loss": 0.0, "step": 27360 }, { "epoch": 19.87, "grad_norm": 0.00014159978309180588, "learning_rate": 3.429355281207133e-07, "loss": 0.0059, "step": 27370 }, { "epoch": 19.88, "grad_norm": 0.0008457335061393678, "learning_rate": 3.2276284999596545e-07, "loss": 0.0028, "step": 27380 }, { "epoch": 19.88, "grad_norm": 0.14093543589115143, "learning_rate": 3.0259017187121764e-07, "loss": 0.0057, "step": 27390 }, { "epoch": 19.89, "grad_norm": 0.0995154157280922, "learning_rate": 2.8241749374646983e-07, "loss": 0.0042, "step": 27400 }, { "epoch": 19.9, "grad_norm": 1.3960028809378855e-05, "learning_rate": 2.6224481562172197e-07, "loss": 0.006, "step": 27410 }, { "epoch": 19.91, "grad_norm": 0.00027196883456781507, "learning_rate": 2.420721374969741e-07, "loss": 0.004, "step": 27420 }, { "epoch": 19.91, "grad_norm": 9.932199463946745e-05, "learning_rate": 2.2189945937222626e-07, "loss": 0.0064, "step": 27430 }, { "epoch": 19.92, "grad_norm": 0.0010937968036159873, "learning_rate": 2.0172678124747843e-07, "loss": 0.0028, "step": 27440 }, { "epoch": 19.93, "grad_norm": 2.9746237487415783e-05, "learning_rate": 1.815541031227306e-07, "loss": 0.0009, "step": 27450 }, { "epoch": 19.93, "grad_norm": 0.0013092844747006893, "learning_rate": 1.6138142499798273e-07, "loss": 0.0007, "step": 27460 }, { "epoch": 19.94, "grad_norm": 0.00010451417620060965, "learning_rate": 1.4120874687323491e-07, "loss": 0.0011, "step": 27470 }, { "epoch": 19.95, "grad_norm": 3.361936614965089e-05, "learning_rate": 1.2103606874848705e-07, "loss": 0.003, "step": 27480 }, { "epoch": 19.96, "grad_norm": 0.0003154365695081651, "learning_rate": 1.0086339062373921e-07, "loss": 0.0029, "step": 27490 }, { "epoch": 19.96, "grad_norm": 0.0006699699442833662, "learning_rate": 8.069071249899136e-08, "loss": 0.001, "step": 27500 }, { "epoch": 19.97, "grad_norm": 0.00039897230453789234, "learning_rate": 6.051803437424353e-08, "loss": 0.0052, "step": 27510 }, { "epoch": 19.98, "grad_norm": 0.0004209213948342949, "learning_rate": 4.034535624949568e-08, "loss": 0.0043, "step": 27520 }, { "epoch": 19.99, "grad_norm": 0.10861359536647797, "learning_rate": 2.017267812474784e-08, "loss": 0.0044, "step": 27530 }, { "epoch": 19.99, "grad_norm": 0.0003400925197638571, "learning_rate": 0.0, "loss": 0.0023, "step": 27540 }, { "epoch": 19.99, "eval_accuracy": 0.99837772836593, "eval_f1": 0.9962540929927963, "eval_loss": 0.003140063723549247, "eval_precision": 0.9998422630001578, "eval_recall": 0.9926915848820212, "eval_roc_auc": 0.9999876949396973, "eval_runtime": 387.381, "eval_samples_per_second": 227.549, "eval_steps_per_second": 14.224, "step": 27540 }, { "epoch": 19.99, "step": 27540, "total_flos": 4.380450478142158e+19, "train_loss": 0.029798476457713677, "train_runtime": 23573.1101, "train_samples_per_second": 74.787, "train_steps_per_second": 1.168 } ], "logging_steps": 10, "max_steps": 27540, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4.380450478142158e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }