{ "best_metric": 12.103847094587513, "best_model_checkpoint": "./hviske-v3/checkpoint-11548", "epoch": 8.0, "eval_steps": 500, "global_step": 11548, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006927606511950121, "grad_norm": 12.292132377624512, "learning_rate": 1.0000000000000002e-06, "loss": 1.2402, "step": 1 }, { "epoch": 0.0013855213023900243, "grad_norm": 11.564336776733398, "learning_rate": 2.0000000000000003e-06, "loss": 1.1541, "step": 2 }, { "epoch": 0.0020782819535850364, "grad_norm": 9.734325408935547, "learning_rate": 3e-06, "loss": 1.0122, "step": 3 }, { "epoch": 0.0027710426047800486, "grad_norm": 7.753880977630615, "learning_rate": 4.000000000000001e-06, "loss": 0.8952, "step": 4 }, { "epoch": 0.0034638032559750607, "grad_norm": 6.528608798980713, "learning_rate": 5e-06, "loss": 0.7526, "step": 5 }, { "epoch": 0.004156563907170073, "grad_norm": 5.133320331573486, "learning_rate": 6e-06, "loss": 0.5005, "step": 6 }, { "epoch": 0.0048493245583650845, "grad_norm": 3.6035006046295166, "learning_rate": 7e-06, "loss": 0.4519, "step": 7 }, { "epoch": 0.005542085209560097, "grad_norm": 2.8495588302612305, "learning_rate": 8.000000000000001e-06, "loss": 0.3598, "step": 8 }, { "epoch": 0.006234845860755109, "grad_norm": 2.3552119731903076, "learning_rate": 9e-06, "loss": 0.337, "step": 9 }, { "epoch": 0.006927606511950121, "grad_norm": 2.3528480529785156, "learning_rate": 1e-05, "loss": 0.3892, "step": 10 }, { "epoch": 0.007620367163145133, "grad_norm": 2.1666362285614014, "learning_rate": 9.999306518723996e-06, "loss": 0.3178, "step": 11 }, { "epoch": 0.008313127814340146, "grad_norm": 1.9824934005737305, "learning_rate": 9.998613037447989e-06, "loss": 0.3137, "step": 12 }, { "epoch": 0.009005888465535157, "grad_norm": 1.8904988765716553, "learning_rate": 9.997919556171984e-06, "loss": 0.3366, "step": 13 }, { "epoch": 0.009698649116730169, "grad_norm": 1.7962043285369873, "learning_rate": 9.997226074895979e-06, "loss": 0.3415, "step": 14 }, { "epoch": 0.010391409767925183, "grad_norm": 1.5924854278564453, "learning_rate": 9.996532593619974e-06, "loss": 0.3083, "step": 15 }, { "epoch": 0.011084170419120194, "grad_norm": 1.5507971048355103, "learning_rate": 9.995839112343969e-06, "loss": 0.2547, "step": 16 }, { "epoch": 0.011776931070315206, "grad_norm": 1.625256061553955, "learning_rate": 9.995145631067962e-06, "loss": 0.3259, "step": 17 }, { "epoch": 0.012469691721510218, "grad_norm": 1.5093649625778198, "learning_rate": 9.994452149791957e-06, "loss": 0.2925, "step": 18 }, { "epoch": 0.013162452372705231, "grad_norm": 1.570859432220459, "learning_rate": 9.99375866851595e-06, "loss": 0.3171, "step": 19 }, { "epoch": 0.013855213023900243, "grad_norm": 1.4971448183059692, "learning_rate": 9.993065187239945e-06, "loss": 0.3069, "step": 20 }, { "epoch": 0.014547973675095255, "grad_norm": 1.3889166116714478, "learning_rate": 9.99237170596394e-06, "loss": 0.2657, "step": 21 }, { "epoch": 0.015240734326290266, "grad_norm": 1.4377930164337158, "learning_rate": 9.991678224687933e-06, "loss": 0.2713, "step": 22 }, { "epoch": 0.01593349497748528, "grad_norm": 1.4533603191375732, "learning_rate": 9.990984743411928e-06, "loss": 0.2843, "step": 23 }, { "epoch": 0.01662625562868029, "grad_norm": 1.4159753322601318, "learning_rate": 9.990291262135923e-06, "loss": 0.3001, "step": 24 }, { "epoch": 0.017319016279875303, "grad_norm": 1.3404206037521362, "learning_rate": 9.989597780859918e-06, "loss": 0.2786, "step": 25 }, { "epoch": 0.018011776931070315, "grad_norm": 1.5711417198181152, "learning_rate": 9.988904299583913e-06, "loss": 0.2985, "step": 26 }, { "epoch": 0.018704537582265326, "grad_norm": 1.3879624605178833, "learning_rate": 9.988210818307906e-06, "loss": 0.2675, "step": 27 }, { "epoch": 0.019397298233460338, "grad_norm": 1.4611817598342896, "learning_rate": 9.987517337031901e-06, "loss": 0.2618, "step": 28 }, { "epoch": 0.02009005888465535, "grad_norm": 1.5529396533966064, "learning_rate": 9.986823855755894e-06, "loss": 0.2912, "step": 29 }, { "epoch": 0.020782819535850365, "grad_norm": 1.4379888772964478, "learning_rate": 9.98613037447989e-06, "loss": 0.2967, "step": 30 }, { "epoch": 0.021475580187045377, "grad_norm": 1.515025019645691, "learning_rate": 9.985436893203884e-06, "loss": 0.3046, "step": 31 }, { "epoch": 0.02216834083824039, "grad_norm": 1.5616956949234009, "learning_rate": 9.98474341192788e-06, "loss": 0.2605, "step": 32 }, { "epoch": 0.0228611014894354, "grad_norm": 1.435473084449768, "learning_rate": 9.984049930651874e-06, "loss": 0.2748, "step": 33 }, { "epoch": 0.023553862140630412, "grad_norm": 1.5559055805206299, "learning_rate": 9.983356449375867e-06, "loss": 0.2775, "step": 34 }, { "epoch": 0.024246622791825424, "grad_norm": 1.3089165687561035, "learning_rate": 9.982662968099862e-06, "loss": 0.2415, "step": 35 }, { "epoch": 0.024939383443020435, "grad_norm": 1.2515681982040405, "learning_rate": 9.981969486823857e-06, "loss": 0.2404, "step": 36 }, { "epoch": 0.025632144094215447, "grad_norm": 1.357074499130249, "learning_rate": 9.98127600554785e-06, "loss": 0.2519, "step": 37 }, { "epoch": 0.026324904745410462, "grad_norm": 1.4473412036895752, "learning_rate": 9.980582524271845e-06, "loss": 0.2975, "step": 38 }, { "epoch": 0.027017665396605474, "grad_norm": 1.442353367805481, "learning_rate": 9.97988904299584e-06, "loss": 0.2451, "step": 39 }, { "epoch": 0.027710426047800486, "grad_norm": 1.4204617738723755, "learning_rate": 9.979195561719834e-06, "loss": 0.2315, "step": 40 }, { "epoch": 0.028403186698995497, "grad_norm": 1.443942904472351, "learning_rate": 9.978502080443829e-06, "loss": 0.2895, "step": 41 }, { "epoch": 0.02909594735019051, "grad_norm": 1.2991156578063965, "learning_rate": 9.977808599167823e-06, "loss": 0.2266, "step": 42 }, { "epoch": 0.02978870800138552, "grad_norm": 1.3881120681762695, "learning_rate": 9.977115117891818e-06, "loss": 0.2772, "step": 43 }, { "epoch": 0.030481468652580532, "grad_norm": 1.3863579034805298, "learning_rate": 9.976421636615812e-06, "loss": 0.2694, "step": 44 }, { "epoch": 0.031174229303775544, "grad_norm": 1.4252607822418213, "learning_rate": 9.975728155339807e-06, "loss": 0.2603, "step": 45 }, { "epoch": 0.03186698995497056, "grad_norm": 1.3916229009628296, "learning_rate": 9.975034674063801e-06, "loss": 0.2634, "step": 46 }, { "epoch": 0.03255975060616557, "grad_norm": 1.3882328271865845, "learning_rate": 9.974341192787795e-06, "loss": 0.2361, "step": 47 }, { "epoch": 0.03325251125736058, "grad_norm": 1.3797417879104614, "learning_rate": 9.97364771151179e-06, "loss": 0.2611, "step": 48 }, { "epoch": 0.03394527190855559, "grad_norm": 1.3497843742370605, "learning_rate": 9.972954230235785e-06, "loss": 0.256, "step": 49 }, { "epoch": 0.034638032559750606, "grad_norm": 1.432814359664917, "learning_rate": 9.97226074895978e-06, "loss": 0.1958, "step": 50 }, { "epoch": 0.03533079321094562, "grad_norm": 1.3722063302993774, "learning_rate": 9.971567267683774e-06, "loss": 0.2559, "step": 51 }, { "epoch": 0.03602355386214063, "grad_norm": 1.3759533166885376, "learning_rate": 9.970873786407768e-06, "loss": 0.2528, "step": 52 }, { "epoch": 0.036716314513335645, "grad_norm": 1.4539951086044312, "learning_rate": 9.970180305131763e-06, "loss": 0.2481, "step": 53 }, { "epoch": 0.03740907516453065, "grad_norm": 1.3330899477005005, "learning_rate": 9.969486823855756e-06, "loss": 0.2212, "step": 54 }, { "epoch": 0.03810183581572567, "grad_norm": 1.3839563131332397, "learning_rate": 9.96879334257975e-06, "loss": 0.2538, "step": 55 }, { "epoch": 0.038794596466920676, "grad_norm": 1.4515337944030762, "learning_rate": 9.968099861303746e-06, "loss": 0.2505, "step": 56 }, { "epoch": 0.03948735711811569, "grad_norm": 1.3861531019210815, "learning_rate": 9.96740638002774e-06, "loss": 0.2625, "step": 57 }, { "epoch": 0.0401801177693107, "grad_norm": 1.364999771118164, "learning_rate": 9.966712898751736e-06, "loss": 0.2652, "step": 58 }, { "epoch": 0.040872878420505715, "grad_norm": 1.4211515188217163, "learning_rate": 9.966019417475729e-06, "loss": 0.2569, "step": 59 }, { "epoch": 0.04156563907170073, "grad_norm": 1.3074867725372314, "learning_rate": 9.965325936199724e-06, "loss": 0.2365, "step": 60 }, { "epoch": 0.04225839972289574, "grad_norm": 1.4820562601089478, "learning_rate": 9.964632454923719e-06, "loss": 0.2756, "step": 61 }, { "epoch": 0.042951160374090754, "grad_norm": 1.3363418579101562, "learning_rate": 9.963938973647712e-06, "loss": 0.2178, "step": 62 }, { "epoch": 0.04364392102528576, "grad_norm": 1.466212272644043, "learning_rate": 9.963245492371707e-06, "loss": 0.2638, "step": 63 }, { "epoch": 0.04433668167648078, "grad_norm": 1.5230470895767212, "learning_rate": 9.9625520110957e-06, "loss": 0.284, "step": 64 }, { "epoch": 0.045029442327675785, "grad_norm": 1.2839550971984863, "learning_rate": 9.961858529819695e-06, "loss": 0.2314, "step": 65 }, { "epoch": 0.0457222029788708, "grad_norm": 1.468959927558899, "learning_rate": 9.96116504854369e-06, "loss": 0.2499, "step": 66 }, { "epoch": 0.046414963630065816, "grad_norm": 1.2798144817352295, "learning_rate": 9.960471567267685e-06, "loss": 0.2303, "step": 67 }, { "epoch": 0.047107724281260824, "grad_norm": 1.324418544769287, "learning_rate": 9.95977808599168e-06, "loss": 0.214, "step": 68 }, { "epoch": 0.04780048493245584, "grad_norm": 1.3903834819793701, "learning_rate": 9.959084604715673e-06, "loss": 0.2675, "step": 69 }, { "epoch": 0.04849324558365085, "grad_norm": 1.4964789152145386, "learning_rate": 9.958391123439668e-06, "loss": 0.2548, "step": 70 }, { "epoch": 0.04918600623484586, "grad_norm": 1.252150058746338, "learning_rate": 9.957697642163663e-06, "loss": 0.2366, "step": 71 }, { "epoch": 0.04987876688604087, "grad_norm": 1.445634126663208, "learning_rate": 9.957004160887656e-06, "loss": 0.2491, "step": 72 }, { "epoch": 0.050571527537235886, "grad_norm": 1.4249677658081055, "learning_rate": 9.956310679611651e-06, "loss": 0.2385, "step": 73 }, { "epoch": 0.051264288188430894, "grad_norm": 1.3850736618041992, "learning_rate": 9.955617198335646e-06, "loss": 0.2443, "step": 74 }, { "epoch": 0.05195704883962591, "grad_norm": 1.3256713151931763, "learning_rate": 9.954923717059641e-06, "loss": 0.2381, "step": 75 }, { "epoch": 0.052649809490820924, "grad_norm": 1.3021267652511597, "learning_rate": 9.954230235783636e-06, "loss": 0.2498, "step": 76 }, { "epoch": 0.05334257014201593, "grad_norm": 1.2603353261947632, "learning_rate": 9.95353675450763e-06, "loss": 0.2668, "step": 77 }, { "epoch": 0.05403533079321095, "grad_norm": 1.2531903982162476, "learning_rate": 9.952843273231624e-06, "loss": 0.2266, "step": 78 }, { "epoch": 0.054728091444405956, "grad_norm": 1.3433337211608887, "learning_rate": 9.952149791955617e-06, "loss": 0.2532, "step": 79 }, { "epoch": 0.05542085209560097, "grad_norm": 1.2393718957901, "learning_rate": 9.951456310679612e-06, "loss": 0.237, "step": 80 }, { "epoch": 0.05611361274679598, "grad_norm": 1.3576997518539429, "learning_rate": 9.950762829403607e-06, "loss": 0.2519, "step": 81 }, { "epoch": 0.056806373397990995, "grad_norm": 1.3664438724517822, "learning_rate": 9.9500693481276e-06, "loss": 0.25, "step": 82 }, { "epoch": 0.057499134049186, "grad_norm": 1.4981049299240112, "learning_rate": 9.949375866851595e-06, "loss": 0.2901, "step": 83 }, { "epoch": 0.05819189470038102, "grad_norm": 1.4166090488433838, "learning_rate": 9.94868238557559e-06, "loss": 0.2465, "step": 84 }, { "epoch": 0.05888465535157603, "grad_norm": 1.2040208578109741, "learning_rate": 9.947988904299585e-06, "loss": 0.1856, "step": 85 }, { "epoch": 0.05957741600277104, "grad_norm": 1.189120888710022, "learning_rate": 9.94729542302358e-06, "loss": 0.2086, "step": 86 }, { "epoch": 0.06027017665396606, "grad_norm": 1.3155075311660767, "learning_rate": 9.946601941747573e-06, "loss": 0.2299, "step": 87 }, { "epoch": 0.060962937305161065, "grad_norm": 1.4105749130249023, "learning_rate": 9.945908460471568e-06, "loss": 0.267, "step": 88 }, { "epoch": 0.06165569795635608, "grad_norm": 1.3087950944900513, "learning_rate": 9.945214979195562e-06, "loss": 0.2267, "step": 89 }, { "epoch": 0.06234845860755109, "grad_norm": 1.3032172918319702, "learning_rate": 9.944521497919557e-06, "loss": 0.2376, "step": 90 }, { "epoch": 0.0630412192587461, "grad_norm": 1.2708215713500977, "learning_rate": 9.943828016643551e-06, "loss": 0.2141, "step": 91 }, { "epoch": 0.06373397990994112, "grad_norm": 1.1809252500534058, "learning_rate": 9.943134535367546e-06, "loss": 0.2102, "step": 92 }, { "epoch": 0.06442674056113613, "grad_norm": 1.3737300634384155, "learning_rate": 9.942441054091541e-06, "loss": 0.2512, "step": 93 }, { "epoch": 0.06511950121233114, "grad_norm": 1.261559009552002, "learning_rate": 9.941747572815535e-06, "loss": 0.218, "step": 94 }, { "epoch": 0.06581226186352615, "grad_norm": 1.394752860069275, "learning_rate": 9.94105409153953e-06, "loss": 0.2524, "step": 95 }, { "epoch": 0.06650502251472117, "grad_norm": 1.3859493732452393, "learning_rate": 9.940360610263524e-06, "loss": 0.1946, "step": 96 }, { "epoch": 0.06719778316591618, "grad_norm": 1.346633791923523, "learning_rate": 9.939667128987518e-06, "loss": 0.2555, "step": 97 }, { "epoch": 0.06789054381711118, "grad_norm": 1.2180049419403076, "learning_rate": 9.938973647711513e-06, "loss": 0.2117, "step": 98 }, { "epoch": 0.0685833044683062, "grad_norm": 1.2323360443115234, "learning_rate": 9.938280166435506e-06, "loss": 0.2402, "step": 99 }, { "epoch": 0.06927606511950121, "grad_norm": 1.1962711811065674, "learning_rate": 9.9375866851595e-06, "loss": 0.2081, "step": 100 }, { "epoch": 0.06996882577069623, "grad_norm": 1.3550431728363037, "learning_rate": 9.936893203883496e-06, "loss": 0.24, "step": 101 }, { "epoch": 0.07066158642189124, "grad_norm": 1.2796891927719116, "learning_rate": 9.93619972260749e-06, "loss": 0.2375, "step": 102 }, { "epoch": 0.07135434707308624, "grad_norm": 1.254270076751709, "learning_rate": 9.935506241331486e-06, "loss": 0.2052, "step": 103 }, { "epoch": 0.07204710772428126, "grad_norm": 1.1684892177581787, "learning_rate": 9.934812760055479e-06, "loss": 0.1916, "step": 104 }, { "epoch": 0.07273986837547627, "grad_norm": 1.381418228149414, "learning_rate": 9.934119278779474e-06, "loss": 0.2708, "step": 105 }, { "epoch": 0.07343262902667129, "grad_norm": 1.2438158988952637, "learning_rate": 9.933425797503469e-06, "loss": 0.2002, "step": 106 }, { "epoch": 0.07412538967786629, "grad_norm": 1.244842529296875, "learning_rate": 9.932732316227462e-06, "loss": 0.2351, "step": 107 }, { "epoch": 0.0748181503290613, "grad_norm": 1.1751757860183716, "learning_rate": 9.932038834951457e-06, "loss": 0.2049, "step": 108 }, { "epoch": 0.07551091098025632, "grad_norm": 1.2962253093719482, "learning_rate": 9.931345353675452e-06, "loss": 0.2092, "step": 109 }, { "epoch": 0.07620367163145134, "grad_norm": 1.2747116088867188, "learning_rate": 9.930651872399447e-06, "loss": 0.2346, "step": 110 }, { "epoch": 0.07689643228264635, "grad_norm": 1.279923915863037, "learning_rate": 9.929958391123442e-06, "loss": 0.2346, "step": 111 }, { "epoch": 0.07758919293384135, "grad_norm": 1.406819224357605, "learning_rate": 9.929264909847435e-06, "loss": 0.2081, "step": 112 }, { "epoch": 0.07828195358503637, "grad_norm": 1.172568678855896, "learning_rate": 9.92857142857143e-06, "loss": 0.1966, "step": 113 }, { "epoch": 0.07897471423623138, "grad_norm": 1.2985316514968872, "learning_rate": 9.927877947295423e-06, "loss": 0.2204, "step": 114 }, { "epoch": 0.0796674748874264, "grad_norm": 1.4315857887268066, "learning_rate": 9.927184466019418e-06, "loss": 0.2115, "step": 115 }, { "epoch": 0.0803602355386214, "grad_norm": 1.6077250242233276, "learning_rate": 9.926490984743413e-06, "loss": 0.2492, "step": 116 }, { "epoch": 0.08105299618981641, "grad_norm": 1.518119215965271, "learning_rate": 9.925797503467406e-06, "loss": 0.2278, "step": 117 }, { "epoch": 0.08174575684101143, "grad_norm": 1.2023013830184937, "learning_rate": 9.925104022191401e-06, "loss": 0.2105, "step": 118 }, { "epoch": 0.08243851749220645, "grad_norm": 1.2681522369384766, "learning_rate": 9.924410540915396e-06, "loss": 0.2107, "step": 119 }, { "epoch": 0.08313127814340146, "grad_norm": 1.2564748525619507, "learning_rate": 9.923717059639391e-06, "loss": 0.2202, "step": 120 }, { "epoch": 0.08382403879459646, "grad_norm": 1.2870888710021973, "learning_rate": 9.923023578363386e-06, "loss": 0.2215, "step": 121 }, { "epoch": 0.08451679944579148, "grad_norm": 1.364901065826416, "learning_rate": 9.922330097087379e-06, "loss": 0.2106, "step": 122 }, { "epoch": 0.08520956009698649, "grad_norm": 1.326931118965149, "learning_rate": 9.921636615811374e-06, "loss": 0.2641, "step": 123 }, { "epoch": 0.08590232074818151, "grad_norm": 1.2108707427978516, "learning_rate": 9.920943134535367e-06, "loss": 0.2265, "step": 124 }, { "epoch": 0.08659508139937652, "grad_norm": 1.4389864206314087, "learning_rate": 9.920249653259362e-06, "loss": 0.2426, "step": 125 }, { "epoch": 0.08728784205057152, "grad_norm": 1.3699928522109985, "learning_rate": 9.919556171983357e-06, "loss": 0.2315, "step": 126 }, { "epoch": 0.08798060270176654, "grad_norm": 1.321111798286438, "learning_rate": 9.918862690707352e-06, "loss": 0.2347, "step": 127 }, { "epoch": 0.08867336335296155, "grad_norm": 1.1944401264190674, "learning_rate": 9.918169209431347e-06, "loss": 0.2258, "step": 128 }, { "epoch": 0.08936612400415657, "grad_norm": 1.245202898979187, "learning_rate": 9.91747572815534e-06, "loss": 0.2356, "step": 129 }, { "epoch": 0.09005888465535157, "grad_norm": 1.2784806489944458, "learning_rate": 9.916782246879335e-06, "loss": 0.2452, "step": 130 }, { "epoch": 0.09075164530654659, "grad_norm": 1.1236475706100464, "learning_rate": 9.91608876560333e-06, "loss": 0.188, "step": 131 }, { "epoch": 0.0914444059577416, "grad_norm": 1.326741337776184, "learning_rate": 9.915395284327323e-06, "loss": 0.2342, "step": 132 }, { "epoch": 0.09213716660893662, "grad_norm": 1.403367280960083, "learning_rate": 9.914701803051318e-06, "loss": 0.2539, "step": 133 }, { "epoch": 0.09282992726013163, "grad_norm": 1.344679355621338, "learning_rate": 9.914008321775313e-06, "loss": 0.2288, "step": 134 }, { "epoch": 0.09352268791132663, "grad_norm": 1.3344292640686035, "learning_rate": 9.913314840499308e-06, "loss": 0.2419, "step": 135 }, { "epoch": 0.09421544856252165, "grad_norm": 1.2694120407104492, "learning_rate": 9.912621359223301e-06, "loss": 0.2186, "step": 136 }, { "epoch": 0.09490820921371666, "grad_norm": 1.0931838750839233, "learning_rate": 9.911927877947296e-06, "loss": 0.1731, "step": 137 }, { "epoch": 0.09560096986491168, "grad_norm": 1.4262518882751465, "learning_rate": 9.911234396671291e-06, "loss": 0.2536, "step": 138 }, { "epoch": 0.09629373051610668, "grad_norm": 1.3521032333374023, "learning_rate": 9.910540915395285e-06, "loss": 0.2245, "step": 139 }, { "epoch": 0.0969864911673017, "grad_norm": 1.4264510869979858, "learning_rate": 9.90984743411928e-06, "loss": 0.2639, "step": 140 }, { "epoch": 0.09767925181849671, "grad_norm": 1.265846610069275, "learning_rate": 9.909153952843274e-06, "loss": 0.1989, "step": 141 }, { "epoch": 0.09837201246969172, "grad_norm": 1.3544530868530273, "learning_rate": 9.908460471567268e-06, "loss": 0.2206, "step": 142 }, { "epoch": 0.09906477312088674, "grad_norm": 1.1606234312057495, "learning_rate": 9.907766990291263e-06, "loss": 0.1849, "step": 143 }, { "epoch": 0.09975753377208174, "grad_norm": 1.3297104835510254, "learning_rate": 9.907073509015258e-06, "loss": 0.2128, "step": 144 }, { "epoch": 0.10045029442327676, "grad_norm": 1.2785829305648804, "learning_rate": 9.906380027739252e-06, "loss": 0.1987, "step": 145 }, { "epoch": 0.10114305507447177, "grad_norm": 1.2113291025161743, "learning_rate": 9.905686546463247e-06, "loss": 0.2208, "step": 146 }, { "epoch": 0.10183581572566679, "grad_norm": 1.3514881134033203, "learning_rate": 9.90499306518724e-06, "loss": 0.2283, "step": 147 }, { "epoch": 0.10252857637686179, "grad_norm": 1.2799649238586426, "learning_rate": 9.904299583911236e-06, "loss": 0.2348, "step": 148 }, { "epoch": 0.1032213370280568, "grad_norm": 1.2758800983428955, "learning_rate": 9.903606102635229e-06, "loss": 0.2051, "step": 149 }, { "epoch": 0.10391409767925182, "grad_norm": 1.4284158945083618, "learning_rate": 9.902912621359224e-06, "loss": 0.2407, "step": 150 }, { "epoch": 0.10460685833044683, "grad_norm": 1.3322699069976807, "learning_rate": 9.902219140083219e-06, "loss": 0.2362, "step": 151 }, { "epoch": 0.10529961898164185, "grad_norm": 1.2086516618728638, "learning_rate": 9.901525658807214e-06, "loss": 0.1891, "step": 152 }, { "epoch": 0.10599237963283685, "grad_norm": 1.2374615669250488, "learning_rate": 9.900832177531209e-06, "loss": 0.2212, "step": 153 }, { "epoch": 0.10668514028403187, "grad_norm": 1.2254961729049683, "learning_rate": 9.900138696255202e-06, "loss": 0.193, "step": 154 }, { "epoch": 0.10737790093522688, "grad_norm": 1.230727195739746, "learning_rate": 9.899445214979197e-06, "loss": 0.2077, "step": 155 }, { "epoch": 0.1080706615864219, "grad_norm": 1.1526367664337158, "learning_rate": 9.898751733703192e-06, "loss": 0.1706, "step": 156 }, { "epoch": 0.1087634222376169, "grad_norm": 1.2905890941619873, "learning_rate": 9.898058252427185e-06, "loss": 0.2072, "step": 157 }, { "epoch": 0.10945618288881191, "grad_norm": 1.2018944025039673, "learning_rate": 9.89736477115118e-06, "loss": 0.2084, "step": 158 }, { "epoch": 0.11014894354000693, "grad_norm": 1.2835495471954346, "learning_rate": 9.896671289875173e-06, "loss": 0.2156, "step": 159 }, { "epoch": 0.11084170419120194, "grad_norm": 1.24733304977417, "learning_rate": 9.895977808599168e-06, "loss": 0.2158, "step": 160 }, { "epoch": 0.11153446484239696, "grad_norm": 1.1799983978271484, "learning_rate": 9.895284327323163e-06, "loss": 0.2064, "step": 161 }, { "epoch": 0.11222722549359196, "grad_norm": 1.3932280540466309, "learning_rate": 9.894590846047158e-06, "loss": 0.2286, "step": 162 }, { "epoch": 0.11291998614478697, "grad_norm": 1.2786810398101807, "learning_rate": 9.893897364771153e-06, "loss": 0.1983, "step": 163 }, { "epoch": 0.11361274679598199, "grad_norm": 1.2713406085968018, "learning_rate": 9.893203883495146e-06, "loss": 0.2441, "step": 164 }, { "epoch": 0.114305507447177, "grad_norm": 1.3331621885299683, "learning_rate": 9.892510402219141e-06, "loss": 0.2472, "step": 165 }, { "epoch": 0.114998268098372, "grad_norm": 1.3456852436065674, "learning_rate": 9.891816920943136e-06, "loss": 0.2302, "step": 166 }, { "epoch": 0.11569102874956702, "grad_norm": 1.2964712381362915, "learning_rate": 9.891123439667129e-06, "loss": 0.2219, "step": 167 }, { "epoch": 0.11638378940076204, "grad_norm": 1.3069645166397095, "learning_rate": 9.890429958391124e-06, "loss": 0.2082, "step": 168 }, { "epoch": 0.11707655005195705, "grad_norm": 1.1544204950332642, "learning_rate": 9.889736477115119e-06, "loss": 0.1808, "step": 169 }, { "epoch": 0.11776931070315207, "grad_norm": 1.2361173629760742, "learning_rate": 9.889042995839114e-06, "loss": 0.2082, "step": 170 }, { "epoch": 0.11846207135434707, "grad_norm": 1.2170089483261108, "learning_rate": 9.888349514563109e-06, "loss": 0.2217, "step": 171 }, { "epoch": 0.11915483200554208, "grad_norm": 1.2207200527191162, "learning_rate": 9.887656033287102e-06, "loss": 0.2112, "step": 172 }, { "epoch": 0.1198475926567371, "grad_norm": 1.2350168228149414, "learning_rate": 9.886962552011097e-06, "loss": 0.2186, "step": 173 }, { "epoch": 0.12054035330793211, "grad_norm": 1.233114242553711, "learning_rate": 9.88626907073509e-06, "loss": 0.2126, "step": 174 }, { "epoch": 0.12123311395912713, "grad_norm": 1.2821439504623413, "learning_rate": 9.885575589459085e-06, "loss": 0.2111, "step": 175 }, { "epoch": 0.12192587461032213, "grad_norm": 1.2624083757400513, "learning_rate": 9.88488210818308e-06, "loss": 0.2094, "step": 176 }, { "epoch": 0.12261863526151714, "grad_norm": 1.2297890186309814, "learning_rate": 9.884188626907073e-06, "loss": 0.2192, "step": 177 }, { "epoch": 0.12331139591271216, "grad_norm": 1.2710490226745605, "learning_rate": 9.883495145631068e-06, "loss": 0.1832, "step": 178 }, { "epoch": 0.12400415656390718, "grad_norm": 1.1201629638671875, "learning_rate": 9.882801664355063e-06, "loss": 0.1738, "step": 179 }, { "epoch": 0.12469691721510218, "grad_norm": 1.30121648311615, "learning_rate": 9.882108183079058e-06, "loss": 0.2266, "step": 180 }, { "epoch": 0.1253896778662972, "grad_norm": 1.3483548164367676, "learning_rate": 9.881414701803053e-06, "loss": 0.2142, "step": 181 }, { "epoch": 0.1260824385174922, "grad_norm": 1.202298879623413, "learning_rate": 9.880721220527046e-06, "loss": 0.1916, "step": 182 }, { "epoch": 0.1267751991686872, "grad_norm": 1.410788893699646, "learning_rate": 9.880027739251041e-06, "loss": 0.2421, "step": 183 }, { "epoch": 0.12746795981988224, "grad_norm": 1.306676983833313, "learning_rate": 9.879334257975035e-06, "loss": 0.2341, "step": 184 }, { "epoch": 0.12816072047107724, "grad_norm": 1.2322577238082886, "learning_rate": 9.87864077669903e-06, "loss": 0.1775, "step": 185 }, { "epoch": 0.12885348112227227, "grad_norm": 1.1031079292297363, "learning_rate": 9.877947295423024e-06, "loss": 0.1945, "step": 186 }, { "epoch": 0.12954624177346727, "grad_norm": 1.3930292129516602, "learning_rate": 9.87725381414702e-06, "loss": 0.2425, "step": 187 }, { "epoch": 0.13023900242466227, "grad_norm": 1.2810267210006714, "learning_rate": 9.876560332871014e-06, "loss": 0.2209, "step": 188 }, { "epoch": 0.1309317630758573, "grad_norm": 1.2307170629501343, "learning_rate": 9.875866851595008e-06, "loss": 0.202, "step": 189 }, { "epoch": 0.1316245237270523, "grad_norm": 1.3543944358825684, "learning_rate": 9.875173370319002e-06, "loss": 0.2147, "step": 190 }, { "epoch": 0.1323172843782473, "grad_norm": 1.176788330078125, "learning_rate": 9.874479889042997e-06, "loss": 0.1863, "step": 191 }, { "epoch": 0.13301004502944233, "grad_norm": 1.282935380935669, "learning_rate": 9.87378640776699e-06, "loss": 0.196, "step": 192 }, { "epoch": 0.13370280568063733, "grad_norm": 1.364748477935791, "learning_rate": 9.873092926490986e-06, "loss": 0.2352, "step": 193 }, { "epoch": 0.13439556633183236, "grad_norm": 1.2242869138717651, "learning_rate": 9.872399445214979e-06, "loss": 0.2178, "step": 194 }, { "epoch": 0.13508832698302736, "grad_norm": 1.2392072677612305, "learning_rate": 9.871705963938974e-06, "loss": 0.2012, "step": 195 }, { "epoch": 0.13578108763422236, "grad_norm": 1.2447081804275513, "learning_rate": 9.871012482662969e-06, "loss": 0.198, "step": 196 }, { "epoch": 0.1364738482854174, "grad_norm": 1.3111966848373413, "learning_rate": 9.870319001386964e-06, "loss": 0.2187, "step": 197 }, { "epoch": 0.1371666089366124, "grad_norm": 1.3184764385223389, "learning_rate": 9.869625520110959e-06, "loss": 0.2719, "step": 198 }, { "epoch": 0.13785936958780742, "grad_norm": 1.1946929693222046, "learning_rate": 9.868932038834952e-06, "loss": 0.1987, "step": 199 }, { "epoch": 0.13855213023900242, "grad_norm": 1.234924554824829, "learning_rate": 9.868238557558947e-06, "loss": 0.2382, "step": 200 }, { "epoch": 0.13924489089019743, "grad_norm": 1.2631289958953857, "learning_rate": 9.867545076282942e-06, "loss": 0.23, "step": 201 }, { "epoch": 0.13993765154139245, "grad_norm": 1.2285739183425903, "learning_rate": 9.866851595006935e-06, "loss": 0.1768, "step": 202 }, { "epoch": 0.14063041219258746, "grad_norm": 1.2478169202804565, "learning_rate": 9.86615811373093e-06, "loss": 0.1902, "step": 203 }, { "epoch": 0.14132317284378249, "grad_norm": 1.2999858856201172, "learning_rate": 9.865464632454925e-06, "loss": 0.2109, "step": 204 }, { "epoch": 0.1420159334949775, "grad_norm": 1.2435208559036255, "learning_rate": 9.86477115117892e-06, "loss": 0.1972, "step": 205 }, { "epoch": 0.1427086941461725, "grad_norm": 1.2937616109848022, "learning_rate": 9.864077669902915e-06, "loss": 0.2116, "step": 206 }, { "epoch": 0.14340145479736752, "grad_norm": 1.2655960321426392, "learning_rate": 9.863384188626908e-06, "loss": 0.2176, "step": 207 }, { "epoch": 0.14409421544856252, "grad_norm": 1.202903389930725, "learning_rate": 9.862690707350903e-06, "loss": 0.1913, "step": 208 }, { "epoch": 0.14478697609975755, "grad_norm": 1.2482086420059204, "learning_rate": 9.861997226074896e-06, "loss": 0.1854, "step": 209 }, { "epoch": 0.14547973675095255, "grad_norm": 1.2493541240692139, "learning_rate": 9.861303744798891e-06, "loss": 0.2055, "step": 210 }, { "epoch": 0.14617249740214755, "grad_norm": 1.0790982246398926, "learning_rate": 9.860610263522886e-06, "loss": 0.1891, "step": 211 }, { "epoch": 0.14686525805334258, "grad_norm": 1.2813910245895386, "learning_rate": 9.85991678224688e-06, "loss": 0.2208, "step": 212 }, { "epoch": 0.14755801870453758, "grad_norm": 1.2617616653442383, "learning_rate": 9.859223300970874e-06, "loss": 0.2139, "step": 213 }, { "epoch": 0.14825077935573258, "grad_norm": 1.3398065567016602, "learning_rate": 9.858529819694869e-06, "loss": 0.1873, "step": 214 }, { "epoch": 0.1489435400069276, "grad_norm": 1.2965143918991089, "learning_rate": 9.857836338418864e-06, "loss": 0.2068, "step": 215 }, { "epoch": 0.1496363006581226, "grad_norm": 1.24671471118927, "learning_rate": 9.857142857142859e-06, "loss": 0.2225, "step": 216 }, { "epoch": 0.15032906130931764, "grad_norm": 1.31780207157135, "learning_rate": 9.856449375866852e-06, "loss": 0.236, "step": 217 }, { "epoch": 0.15102182196051264, "grad_norm": 1.3348220586776733, "learning_rate": 9.855755894590847e-06, "loss": 0.228, "step": 218 }, { "epoch": 0.15171458261170764, "grad_norm": 1.3435728549957275, "learning_rate": 9.85506241331484e-06, "loss": 0.2072, "step": 219 }, { "epoch": 0.15240734326290267, "grad_norm": 1.2767689228057861, "learning_rate": 9.854368932038835e-06, "loss": 0.2147, "step": 220 }, { "epoch": 0.15310010391409767, "grad_norm": 1.2819961309432983, "learning_rate": 9.85367545076283e-06, "loss": 0.2037, "step": 221 }, { "epoch": 0.1537928645652927, "grad_norm": 1.2598323822021484, "learning_rate": 9.852981969486825e-06, "loss": 0.2072, "step": 222 }, { "epoch": 0.1544856252164877, "grad_norm": 1.3809995651245117, "learning_rate": 9.85228848821082e-06, "loss": 0.227, "step": 223 }, { "epoch": 0.1551783858676827, "grad_norm": 1.1424462795257568, "learning_rate": 9.851595006934813e-06, "loss": 0.2237, "step": 224 }, { "epoch": 0.15587114651887773, "grad_norm": 1.2016152143478394, "learning_rate": 9.850901525658808e-06, "loss": 0.187, "step": 225 }, { "epoch": 0.15656390717007274, "grad_norm": 1.221073865890503, "learning_rate": 9.850208044382803e-06, "loss": 0.1899, "step": 226 }, { "epoch": 0.15725666782126776, "grad_norm": 1.2531520128250122, "learning_rate": 9.849514563106796e-06, "loss": 0.2314, "step": 227 }, { "epoch": 0.15794942847246277, "grad_norm": 1.2226425409317017, "learning_rate": 9.848821081830791e-06, "loss": 0.2108, "step": 228 }, { "epoch": 0.15864218912365777, "grad_norm": 1.1877517700195312, "learning_rate": 9.848127600554786e-06, "loss": 0.225, "step": 229 }, { "epoch": 0.1593349497748528, "grad_norm": 1.371266484260559, "learning_rate": 9.847434119278781e-06, "loss": 0.2285, "step": 230 }, { "epoch": 0.1600277104260478, "grad_norm": 1.1313170194625854, "learning_rate": 9.846740638002776e-06, "loss": 0.2077, "step": 231 }, { "epoch": 0.1607204710772428, "grad_norm": 1.3221700191497803, "learning_rate": 9.84604715672677e-06, "loss": 0.2128, "step": 232 }, { "epoch": 0.16141323172843783, "grad_norm": 1.402754783630371, "learning_rate": 9.845353675450764e-06, "loss": 0.2218, "step": 233 }, { "epoch": 0.16210599237963283, "grad_norm": 1.1608260869979858, "learning_rate": 9.844660194174757e-06, "loss": 0.2163, "step": 234 }, { "epoch": 0.16279875303082786, "grad_norm": 1.2287039756774902, "learning_rate": 9.843966712898752e-06, "loss": 0.2145, "step": 235 }, { "epoch": 0.16349151368202286, "grad_norm": 1.31821608543396, "learning_rate": 9.843273231622747e-06, "loss": 0.1889, "step": 236 }, { "epoch": 0.16418427433321786, "grad_norm": 1.2445365190505981, "learning_rate": 9.84257975034674e-06, "loss": 0.2296, "step": 237 }, { "epoch": 0.1648770349844129, "grad_norm": 1.3389273881912231, "learning_rate": 9.841886269070736e-06, "loss": 0.2008, "step": 238 }, { "epoch": 0.1655697956356079, "grad_norm": 1.1889816522598267, "learning_rate": 9.84119278779473e-06, "loss": 0.2062, "step": 239 }, { "epoch": 0.16626255628680292, "grad_norm": 1.1592460870742798, "learning_rate": 9.840499306518725e-06, "loss": 0.1967, "step": 240 }, { "epoch": 0.16695531693799792, "grad_norm": 1.087691068649292, "learning_rate": 9.83980582524272e-06, "loss": 0.1709, "step": 241 }, { "epoch": 0.16764807758919292, "grad_norm": 1.0733919143676758, "learning_rate": 9.839112343966714e-06, "loss": 0.1641, "step": 242 }, { "epoch": 0.16834083824038795, "grad_norm": 1.19711172580719, "learning_rate": 9.838418862690708e-06, "loss": 0.2138, "step": 243 }, { "epoch": 0.16903359889158295, "grad_norm": 1.1129231452941895, "learning_rate": 9.837725381414702e-06, "loss": 0.1721, "step": 244 }, { "epoch": 0.16972635954277798, "grad_norm": 1.3185864686965942, "learning_rate": 9.837031900138697e-06, "loss": 0.2135, "step": 245 }, { "epoch": 0.17041912019397298, "grad_norm": 1.4742556810379028, "learning_rate": 9.836338418862692e-06, "loss": 0.2573, "step": 246 }, { "epoch": 0.17111188084516799, "grad_norm": 1.267116904258728, "learning_rate": 9.835644937586687e-06, "loss": 0.1905, "step": 247 }, { "epoch": 0.17180464149636301, "grad_norm": 1.193515419960022, "learning_rate": 9.834951456310681e-06, "loss": 0.1993, "step": 248 }, { "epoch": 0.17249740214755802, "grad_norm": 1.228421926498413, "learning_rate": 9.834257975034675e-06, "loss": 0.191, "step": 249 }, { "epoch": 0.17319016279875304, "grad_norm": 1.1218215227127075, "learning_rate": 9.83356449375867e-06, "loss": 0.196, "step": 250 }, { "epoch": 0.17388292344994805, "grad_norm": 1.4057866334915161, "learning_rate": 9.832871012482665e-06, "loss": 0.2423, "step": 251 }, { "epoch": 0.17457568410114305, "grad_norm": 1.2802729606628418, "learning_rate": 9.832177531206658e-06, "loss": 0.1895, "step": 252 }, { "epoch": 0.17526844475233808, "grad_norm": 1.1412217617034912, "learning_rate": 9.831484049930653e-06, "loss": 0.1842, "step": 253 }, { "epoch": 0.17596120540353308, "grad_norm": 1.2268813848495483, "learning_rate": 9.830790568654646e-06, "loss": 0.1816, "step": 254 }, { "epoch": 0.17665396605472808, "grad_norm": 1.364072561264038, "learning_rate": 9.830097087378641e-06, "loss": 0.2048, "step": 255 }, { "epoch": 0.1773467267059231, "grad_norm": 1.3302061557769775, "learning_rate": 9.829403606102636e-06, "loss": 0.2034, "step": 256 }, { "epoch": 0.1780394873571181, "grad_norm": 1.359153389930725, "learning_rate": 9.82871012482663e-06, "loss": 0.2518, "step": 257 }, { "epoch": 0.17873224800831314, "grad_norm": 1.1203558444976807, "learning_rate": 9.828016643550626e-06, "loss": 0.1812, "step": 258 }, { "epoch": 0.17942500865950814, "grad_norm": 1.5545564889907837, "learning_rate": 9.827323162274619e-06, "loss": 0.2388, "step": 259 }, { "epoch": 0.18011776931070314, "grad_norm": 1.2660139799118042, "learning_rate": 9.826629680998614e-06, "loss": 0.1896, "step": 260 }, { "epoch": 0.18081052996189817, "grad_norm": 1.1667258739471436, "learning_rate": 9.825936199722609e-06, "loss": 0.1964, "step": 261 }, { "epoch": 0.18150329061309317, "grad_norm": 1.3700950145721436, "learning_rate": 9.825242718446602e-06, "loss": 0.2092, "step": 262 }, { "epoch": 0.1821960512642882, "grad_norm": 1.4845219850540161, "learning_rate": 9.824549237170597e-06, "loss": 0.2104, "step": 263 }, { "epoch": 0.1828888119154832, "grad_norm": 1.2829970121383667, "learning_rate": 9.823855755894592e-06, "loss": 0.1913, "step": 264 }, { "epoch": 0.1835815725666782, "grad_norm": 1.3625524044036865, "learning_rate": 9.823162274618587e-06, "loss": 0.1969, "step": 265 }, { "epoch": 0.18427433321787323, "grad_norm": 1.195306658744812, "learning_rate": 9.822468793342582e-06, "loss": 0.1846, "step": 266 }, { "epoch": 0.18496709386906823, "grad_norm": 1.247125267982483, "learning_rate": 9.821775312066575e-06, "loss": 0.1967, "step": 267 }, { "epoch": 0.18565985452026326, "grad_norm": 1.1861320734024048, "learning_rate": 9.82108183079057e-06, "loss": 0.1864, "step": 268 }, { "epoch": 0.18635261517145826, "grad_norm": 1.2261962890625, "learning_rate": 9.820388349514563e-06, "loss": 0.2117, "step": 269 }, { "epoch": 0.18704537582265326, "grad_norm": 1.260686993598938, "learning_rate": 9.819694868238558e-06, "loss": 0.2126, "step": 270 }, { "epoch": 0.1877381364738483, "grad_norm": 1.3203638792037964, "learning_rate": 9.819001386962553e-06, "loss": 0.2191, "step": 271 }, { "epoch": 0.1884308971250433, "grad_norm": 1.3990230560302734, "learning_rate": 9.818307905686546e-06, "loss": 0.195, "step": 272 }, { "epoch": 0.1891236577762383, "grad_norm": 1.2345364093780518, "learning_rate": 9.817614424410541e-06, "loss": 0.2135, "step": 273 }, { "epoch": 0.18981641842743333, "grad_norm": 1.2527592182159424, "learning_rate": 9.816920943134536e-06, "loss": 0.2312, "step": 274 }, { "epoch": 0.19050917907862833, "grad_norm": 1.3236708641052246, "learning_rate": 9.816227461858531e-06, "loss": 0.226, "step": 275 }, { "epoch": 0.19120193972982336, "grad_norm": 1.2532308101654053, "learning_rate": 9.815533980582526e-06, "loss": 0.2258, "step": 276 }, { "epoch": 0.19189470038101836, "grad_norm": 1.1519466638565063, "learning_rate": 9.81484049930652e-06, "loss": 0.1831, "step": 277 }, { "epoch": 0.19258746103221336, "grad_norm": 1.0873594284057617, "learning_rate": 9.814147018030514e-06, "loss": 0.1596, "step": 278 }, { "epoch": 0.1932802216834084, "grad_norm": 1.0936123132705688, "learning_rate": 9.813453536754507e-06, "loss": 0.1909, "step": 279 }, { "epoch": 0.1939729823346034, "grad_norm": 1.2175698280334473, "learning_rate": 9.812760055478502e-06, "loss": 0.1901, "step": 280 }, { "epoch": 0.19466574298579842, "grad_norm": 1.2072296142578125, "learning_rate": 9.812066574202497e-06, "loss": 0.2185, "step": 281 }, { "epoch": 0.19535850363699342, "grad_norm": 1.2681595087051392, "learning_rate": 9.811373092926492e-06, "loss": 0.1889, "step": 282 }, { "epoch": 0.19605126428818842, "grad_norm": 1.2664990425109863, "learning_rate": 9.810679611650487e-06, "loss": 0.2113, "step": 283 }, { "epoch": 0.19674402493938345, "grad_norm": 1.1895562410354614, "learning_rate": 9.80998613037448e-06, "loss": 0.1695, "step": 284 }, { "epoch": 0.19743678559057845, "grad_norm": 1.3624688386917114, "learning_rate": 9.809292649098475e-06, "loss": 0.2142, "step": 285 }, { "epoch": 0.19812954624177348, "grad_norm": 1.0687636137008667, "learning_rate": 9.80859916782247e-06, "loss": 0.1879, "step": 286 }, { "epoch": 0.19882230689296848, "grad_norm": 1.2875828742980957, "learning_rate": 9.807905686546464e-06, "loss": 0.1725, "step": 287 }, { "epoch": 0.19951506754416348, "grad_norm": 1.3224164247512817, "learning_rate": 9.807212205270458e-06, "loss": 0.2365, "step": 288 }, { "epoch": 0.2002078281953585, "grad_norm": 1.1853020191192627, "learning_rate": 9.806518723994453e-06, "loss": 0.178, "step": 289 }, { "epoch": 0.2009005888465535, "grad_norm": 1.1800174713134766, "learning_rate": 9.805825242718447e-06, "loss": 0.2122, "step": 290 }, { "epoch": 0.20159334949774851, "grad_norm": 1.3669440746307373, "learning_rate": 9.805131761442442e-06, "loss": 0.1868, "step": 291 }, { "epoch": 0.20228611014894354, "grad_norm": 1.2624220848083496, "learning_rate": 9.804438280166437e-06, "loss": 0.1918, "step": 292 }, { "epoch": 0.20297887080013854, "grad_norm": 1.2374008893966675, "learning_rate": 9.803744798890431e-06, "loss": 0.2083, "step": 293 }, { "epoch": 0.20367163145133357, "grad_norm": 1.2160348892211914, "learning_rate": 9.803051317614425e-06, "loss": 0.2094, "step": 294 }, { "epoch": 0.20436439210252857, "grad_norm": 1.2681633234024048, "learning_rate": 9.80235783633842e-06, "loss": 0.1929, "step": 295 }, { "epoch": 0.20505715275372358, "grad_norm": 1.4707847833633423, "learning_rate": 9.801664355062415e-06, "loss": 0.2017, "step": 296 }, { "epoch": 0.2057499134049186, "grad_norm": 1.315453290939331, "learning_rate": 9.800970873786408e-06, "loss": 0.2484, "step": 297 }, { "epoch": 0.2064426740561136, "grad_norm": 1.1209535598754883, "learning_rate": 9.800277392510403e-06, "loss": 0.2102, "step": 298 }, { "epoch": 0.20713543470730864, "grad_norm": 1.1134284734725952, "learning_rate": 9.799583911234398e-06, "loss": 0.1657, "step": 299 }, { "epoch": 0.20782819535850364, "grad_norm": 1.4142677783966064, "learning_rate": 9.798890429958393e-06, "loss": 0.278, "step": 300 }, { "epoch": 0.20852095600969864, "grad_norm": 1.3863179683685303, "learning_rate": 9.798196948682388e-06, "loss": 0.199, "step": 301 }, { "epoch": 0.20921371666089367, "grad_norm": 1.3656364679336548, "learning_rate": 9.79750346740638e-06, "loss": 0.2104, "step": 302 }, { "epoch": 0.20990647731208867, "grad_norm": 1.2141101360321045, "learning_rate": 9.796809986130376e-06, "loss": 0.195, "step": 303 }, { "epoch": 0.2105992379632837, "grad_norm": 0.9968119859695435, "learning_rate": 9.796116504854369e-06, "loss": 0.1642, "step": 304 }, { "epoch": 0.2112919986144787, "grad_norm": 1.1695276498794556, "learning_rate": 9.795423023578364e-06, "loss": 0.1836, "step": 305 }, { "epoch": 0.2119847592656737, "grad_norm": 1.1669930219650269, "learning_rate": 9.794729542302359e-06, "loss": 0.18, "step": 306 }, { "epoch": 0.21267751991686873, "grad_norm": 1.2998963594436646, "learning_rate": 9.794036061026354e-06, "loss": 0.1898, "step": 307 }, { "epoch": 0.21337028056806373, "grad_norm": 1.1873587369918823, "learning_rate": 9.793342579750349e-06, "loss": 0.1659, "step": 308 }, { "epoch": 0.21406304121925876, "grad_norm": 1.2754130363464355, "learning_rate": 9.792649098474342e-06, "loss": 0.244, "step": 309 }, { "epoch": 0.21475580187045376, "grad_norm": 1.1994528770446777, "learning_rate": 9.791955617198337e-06, "loss": 0.1897, "step": 310 }, { "epoch": 0.21544856252164876, "grad_norm": 1.1074354648590088, "learning_rate": 9.791262135922332e-06, "loss": 0.2014, "step": 311 }, { "epoch": 0.2161413231728438, "grad_norm": 1.0358245372772217, "learning_rate": 9.790568654646325e-06, "loss": 0.1665, "step": 312 }, { "epoch": 0.2168340838240388, "grad_norm": 1.2764501571655273, "learning_rate": 9.78987517337032e-06, "loss": 0.2242, "step": 313 }, { "epoch": 0.2175268444752338, "grad_norm": 1.310734748840332, "learning_rate": 9.789181692094313e-06, "loss": 0.2104, "step": 314 }, { "epoch": 0.21821960512642882, "grad_norm": 1.4064656496047974, "learning_rate": 9.788488210818308e-06, "loss": 0.2077, "step": 315 }, { "epoch": 0.21891236577762382, "grad_norm": 1.2232162952423096, "learning_rate": 9.787794729542303e-06, "loss": 0.214, "step": 316 }, { "epoch": 0.21960512642881885, "grad_norm": 1.4126719236373901, "learning_rate": 9.787101248266298e-06, "loss": 0.2201, "step": 317 }, { "epoch": 0.22029788708001385, "grad_norm": 1.2731772661209106, "learning_rate": 9.786407766990293e-06, "loss": 0.2398, "step": 318 }, { "epoch": 0.22099064773120886, "grad_norm": 1.2599573135375977, "learning_rate": 9.785714285714286e-06, "loss": 0.1858, "step": 319 }, { "epoch": 0.22168340838240388, "grad_norm": 1.168334722518921, "learning_rate": 9.785020804438281e-06, "loss": 0.1803, "step": 320 }, { "epoch": 0.22237616903359889, "grad_norm": 1.2936931848526, "learning_rate": 9.784327323162276e-06, "loss": 0.1773, "step": 321 }, { "epoch": 0.22306892968479392, "grad_norm": 1.203903317451477, "learning_rate": 9.78363384188627e-06, "loss": 0.19, "step": 322 }, { "epoch": 0.22376169033598892, "grad_norm": 1.1261348724365234, "learning_rate": 9.782940360610264e-06, "loss": 0.2082, "step": 323 }, { "epoch": 0.22445445098718392, "grad_norm": 1.328880786895752, "learning_rate": 9.782246879334259e-06, "loss": 0.1634, "step": 324 }, { "epoch": 0.22514721163837895, "grad_norm": 1.2127742767333984, "learning_rate": 9.781553398058254e-06, "loss": 0.1974, "step": 325 }, { "epoch": 0.22583997228957395, "grad_norm": 1.1583975553512573, "learning_rate": 9.780859916782249e-06, "loss": 0.1962, "step": 326 }, { "epoch": 0.22653273294076898, "grad_norm": 1.3292402029037476, "learning_rate": 9.780166435506242e-06, "loss": 0.2285, "step": 327 }, { "epoch": 0.22722549359196398, "grad_norm": 1.1186047792434692, "learning_rate": 9.779472954230237e-06, "loss": 0.1795, "step": 328 }, { "epoch": 0.22791825424315898, "grad_norm": 1.1951663494110107, "learning_rate": 9.77877947295423e-06, "loss": 0.1856, "step": 329 }, { "epoch": 0.228611014894354, "grad_norm": 1.2000247240066528, "learning_rate": 9.778085991678225e-06, "loss": 0.2217, "step": 330 }, { "epoch": 0.229303775545549, "grad_norm": 1.190314531326294, "learning_rate": 9.77739251040222e-06, "loss": 0.1898, "step": 331 }, { "epoch": 0.229996536196744, "grad_norm": 1.2973747253417969, "learning_rate": 9.776699029126214e-06, "loss": 0.2458, "step": 332 }, { "epoch": 0.23068929684793904, "grad_norm": 1.2944179773330688, "learning_rate": 9.776005547850208e-06, "loss": 0.2192, "step": 333 }, { "epoch": 0.23138205749913404, "grad_norm": 1.2244884967803955, "learning_rate": 9.775312066574203e-06, "loss": 0.195, "step": 334 }, { "epoch": 0.23207481815032907, "grad_norm": 1.1542786359786987, "learning_rate": 9.774618585298198e-06, "loss": 0.2109, "step": 335 }, { "epoch": 0.23276757880152407, "grad_norm": 1.220381498336792, "learning_rate": 9.773925104022193e-06, "loss": 0.1793, "step": 336 }, { "epoch": 0.23346033945271907, "grad_norm": 1.3393840789794922, "learning_rate": 9.773231622746186e-06, "loss": 0.2216, "step": 337 }, { "epoch": 0.2341531001039141, "grad_norm": 1.2374804019927979, "learning_rate": 9.772538141470181e-06, "loss": 0.1691, "step": 338 }, { "epoch": 0.2348458607551091, "grad_norm": 1.1657991409301758, "learning_rate": 9.771844660194175e-06, "loss": 0.1849, "step": 339 }, { "epoch": 0.23553862140630413, "grad_norm": 1.2341691255569458, "learning_rate": 9.77115117891817e-06, "loss": 0.22, "step": 340 }, { "epoch": 0.23623138205749913, "grad_norm": 1.1729243993759155, "learning_rate": 9.770457697642165e-06, "loss": 0.1999, "step": 341 }, { "epoch": 0.23692414270869414, "grad_norm": 1.2024861574172974, "learning_rate": 9.76976421636616e-06, "loss": 0.2065, "step": 342 }, { "epoch": 0.23761690335988916, "grad_norm": 1.1806306838989258, "learning_rate": 9.769070735090154e-06, "loss": 0.2052, "step": 343 }, { "epoch": 0.23830966401108417, "grad_norm": 1.2302980422973633, "learning_rate": 9.768377253814148e-06, "loss": 0.1886, "step": 344 }, { "epoch": 0.2390024246622792, "grad_norm": 1.370265007019043, "learning_rate": 9.767683772538143e-06, "loss": 0.2156, "step": 345 }, { "epoch": 0.2396951853134742, "grad_norm": 1.2476282119750977, "learning_rate": 9.766990291262138e-06, "loss": 0.2018, "step": 346 }, { "epoch": 0.2403879459646692, "grad_norm": 1.2265602350234985, "learning_rate": 9.76629680998613e-06, "loss": 0.2077, "step": 347 }, { "epoch": 0.24108070661586423, "grad_norm": 1.2666821479797363, "learning_rate": 9.765603328710126e-06, "loss": 0.2045, "step": 348 }, { "epoch": 0.24177346726705923, "grad_norm": 1.2393244504928589, "learning_rate": 9.764909847434119e-06, "loss": 0.2169, "step": 349 }, { "epoch": 0.24246622791825426, "grad_norm": 1.1078341007232666, "learning_rate": 9.764216366158114e-06, "loss": 0.1778, "step": 350 }, { "epoch": 0.24315898856944926, "grad_norm": 1.1971571445465088, "learning_rate": 9.763522884882109e-06, "loss": 0.2142, "step": 351 }, { "epoch": 0.24385174922064426, "grad_norm": 1.0961097478866577, "learning_rate": 9.762829403606104e-06, "loss": 0.1795, "step": 352 }, { "epoch": 0.2445445098718393, "grad_norm": 1.141653299331665, "learning_rate": 9.762135922330099e-06, "loss": 0.1912, "step": 353 }, { "epoch": 0.2452372705230343, "grad_norm": 1.2854044437408447, "learning_rate": 9.761442441054092e-06, "loss": 0.215, "step": 354 }, { "epoch": 0.2459300311742293, "grad_norm": 1.32241690158844, "learning_rate": 9.760748959778087e-06, "loss": 0.2358, "step": 355 }, { "epoch": 0.24662279182542432, "grad_norm": 1.2686291933059692, "learning_rate": 9.760055478502082e-06, "loss": 0.2248, "step": 356 }, { "epoch": 0.24731555247661932, "grad_norm": 1.0646724700927734, "learning_rate": 9.759361997226075e-06, "loss": 0.1383, "step": 357 }, { "epoch": 0.24800831312781435, "grad_norm": 1.18061363697052, "learning_rate": 9.75866851595007e-06, "loss": 0.2103, "step": 358 }, { "epoch": 0.24870107377900935, "grad_norm": 1.1441906690597534, "learning_rate": 9.757975034674065e-06, "loss": 0.1733, "step": 359 }, { "epoch": 0.24939383443020435, "grad_norm": 1.244655966758728, "learning_rate": 9.75728155339806e-06, "loss": 0.2094, "step": 360 }, { "epoch": 0.25008659508139935, "grad_norm": 1.1576495170593262, "learning_rate": 9.756588072122055e-06, "loss": 0.1762, "step": 361 }, { "epoch": 0.2507793557325944, "grad_norm": 1.246250867843628, "learning_rate": 9.755894590846048e-06, "loss": 0.1998, "step": 362 }, { "epoch": 0.2514721163837894, "grad_norm": 1.1842297315597534, "learning_rate": 9.755201109570043e-06, "loss": 0.1827, "step": 363 }, { "epoch": 0.2521648770349844, "grad_norm": 1.2482850551605225, "learning_rate": 9.754507628294036e-06, "loss": 0.1867, "step": 364 }, { "epoch": 0.2528576376861794, "grad_norm": 1.3382683992385864, "learning_rate": 9.753814147018031e-06, "loss": 0.2192, "step": 365 }, { "epoch": 0.2535503983373744, "grad_norm": 1.0633735656738281, "learning_rate": 9.753120665742026e-06, "loss": 0.163, "step": 366 }, { "epoch": 0.2542431589885695, "grad_norm": 1.0362846851348877, "learning_rate": 9.752427184466021e-06, "loss": 0.172, "step": 367 }, { "epoch": 0.2549359196397645, "grad_norm": 1.1413606405258179, "learning_rate": 9.751733703190014e-06, "loss": 0.2118, "step": 368 }, { "epoch": 0.2556286802909595, "grad_norm": 1.2740391492843628, "learning_rate": 9.751040221914009e-06, "loss": 0.2133, "step": 369 }, { "epoch": 0.2563214409421545, "grad_norm": 1.2702744007110596, "learning_rate": 9.750346740638004e-06, "loss": 0.2098, "step": 370 }, { "epoch": 0.2570142015933495, "grad_norm": 1.3101216554641724, "learning_rate": 9.749653259361997e-06, "loss": 0.2287, "step": 371 }, { "epoch": 0.25770696224454454, "grad_norm": 1.1721727848052979, "learning_rate": 9.748959778085992e-06, "loss": 0.1813, "step": 372 }, { "epoch": 0.25839972289573954, "grad_norm": 1.2729319334030151, "learning_rate": 9.748266296809987e-06, "loss": 0.1981, "step": 373 }, { "epoch": 0.25909248354693454, "grad_norm": 1.223813533782959, "learning_rate": 9.74757281553398e-06, "loss": 0.1912, "step": 374 }, { "epoch": 0.25978524419812954, "grad_norm": 1.198010802268982, "learning_rate": 9.746879334257975e-06, "loss": 0.1891, "step": 375 }, { "epoch": 0.26047800484932454, "grad_norm": 1.2105743885040283, "learning_rate": 9.74618585298197e-06, "loss": 0.2123, "step": 376 }, { "epoch": 0.2611707655005196, "grad_norm": 1.2053524255752563, "learning_rate": 9.745492371705965e-06, "loss": 0.2044, "step": 377 }, { "epoch": 0.2618635261517146, "grad_norm": 1.3305010795593262, "learning_rate": 9.74479889042996e-06, "loss": 0.203, "step": 378 }, { "epoch": 0.2625562868029096, "grad_norm": 1.1689729690551758, "learning_rate": 9.744105409153953e-06, "loss": 0.2056, "step": 379 }, { "epoch": 0.2632490474541046, "grad_norm": 1.2126179933547974, "learning_rate": 9.743411927877948e-06, "loss": 0.2246, "step": 380 }, { "epoch": 0.2639418081052996, "grad_norm": 1.2313309907913208, "learning_rate": 9.742718446601942e-06, "loss": 0.1887, "step": 381 }, { "epoch": 0.2646345687564946, "grad_norm": 1.238054633140564, "learning_rate": 9.742024965325936e-06, "loss": 0.2449, "step": 382 }, { "epoch": 0.26532732940768966, "grad_norm": 1.1343685388565063, "learning_rate": 9.741331484049931e-06, "loss": 0.1905, "step": 383 }, { "epoch": 0.26602009005888466, "grad_norm": 1.0633238554000854, "learning_rate": 9.740638002773926e-06, "loss": 0.1662, "step": 384 }, { "epoch": 0.26671285071007966, "grad_norm": 1.267633318901062, "learning_rate": 9.739944521497921e-06, "loss": 0.1999, "step": 385 }, { "epoch": 0.26740561136127466, "grad_norm": 1.1595088243484497, "learning_rate": 9.739251040221915e-06, "loss": 0.2027, "step": 386 }, { "epoch": 0.26809837201246967, "grad_norm": 1.138271689414978, "learning_rate": 9.73855755894591e-06, "loss": 0.1548, "step": 387 }, { "epoch": 0.2687911326636647, "grad_norm": 1.0888426303863525, "learning_rate": 9.737864077669904e-06, "loss": 0.2006, "step": 388 }, { "epoch": 0.2694838933148597, "grad_norm": 1.2251331806182861, "learning_rate": 9.737170596393898e-06, "loss": 0.2073, "step": 389 }, { "epoch": 0.2701766539660547, "grad_norm": 1.1277856826782227, "learning_rate": 9.736477115117893e-06, "loss": 0.1727, "step": 390 }, { "epoch": 0.2708694146172497, "grad_norm": 1.1262153387069702, "learning_rate": 9.735783633841886e-06, "loss": 0.2016, "step": 391 }, { "epoch": 0.2715621752684447, "grad_norm": 1.321863055229187, "learning_rate": 9.73509015256588e-06, "loss": 0.2055, "step": 392 }, { "epoch": 0.2722549359196398, "grad_norm": 1.271531343460083, "learning_rate": 9.734396671289876e-06, "loss": 0.2554, "step": 393 }, { "epoch": 0.2729476965708348, "grad_norm": 1.216313123703003, "learning_rate": 9.73370319001387e-06, "loss": 0.2341, "step": 394 }, { "epoch": 0.2736404572220298, "grad_norm": 1.1416480541229248, "learning_rate": 9.733009708737866e-06, "loss": 0.1699, "step": 395 }, { "epoch": 0.2743332178732248, "grad_norm": 1.0104820728302002, "learning_rate": 9.732316227461859e-06, "loss": 0.1712, "step": 396 }, { "epoch": 0.2750259785244198, "grad_norm": 1.174569845199585, "learning_rate": 9.731622746185854e-06, "loss": 0.1882, "step": 397 }, { "epoch": 0.27571873917561485, "grad_norm": 1.202559471130371, "learning_rate": 9.730929264909849e-06, "loss": 0.2103, "step": 398 }, { "epoch": 0.27641149982680985, "grad_norm": 1.1985924243927002, "learning_rate": 9.730235783633842e-06, "loss": 0.1878, "step": 399 }, { "epoch": 0.27710426047800485, "grad_norm": 1.156169056892395, "learning_rate": 9.729542302357837e-06, "loss": 0.2074, "step": 400 }, { "epoch": 0.27779702112919985, "grad_norm": 1.2795816659927368, "learning_rate": 9.728848821081832e-06, "loss": 0.2074, "step": 401 }, { "epoch": 0.27848978178039485, "grad_norm": 1.1505229473114014, "learning_rate": 9.728155339805827e-06, "loss": 0.192, "step": 402 }, { "epoch": 0.2791825424315899, "grad_norm": 1.221264362335205, "learning_rate": 9.727461858529822e-06, "loss": 0.1976, "step": 403 }, { "epoch": 0.2798753030827849, "grad_norm": 1.379112720489502, "learning_rate": 9.726768377253815e-06, "loss": 0.1998, "step": 404 }, { "epoch": 0.2805680637339799, "grad_norm": 1.002105951309204, "learning_rate": 9.72607489597781e-06, "loss": 0.1537, "step": 405 }, { "epoch": 0.2812608243851749, "grad_norm": 1.2976447343826294, "learning_rate": 9.725381414701803e-06, "loss": 0.2045, "step": 406 }, { "epoch": 0.2819535850363699, "grad_norm": 1.293208360671997, "learning_rate": 9.724687933425798e-06, "loss": 0.2234, "step": 407 }, { "epoch": 0.28264634568756497, "grad_norm": 1.0355128049850464, "learning_rate": 9.723994452149793e-06, "loss": 0.1715, "step": 408 }, { "epoch": 0.28333910633875997, "grad_norm": 1.1013727188110352, "learning_rate": 9.723300970873786e-06, "loss": 0.156, "step": 409 }, { "epoch": 0.284031866989955, "grad_norm": 1.2460412979125977, "learning_rate": 9.722607489597781e-06, "loss": 0.226, "step": 410 }, { "epoch": 0.28472462764115, "grad_norm": 1.16079843044281, "learning_rate": 9.721914008321776e-06, "loss": 0.1926, "step": 411 }, { "epoch": 0.285417388292345, "grad_norm": 1.0862517356872559, "learning_rate": 9.721220527045771e-06, "loss": 0.1744, "step": 412 }, { "epoch": 0.28611014894354003, "grad_norm": 1.147106409072876, "learning_rate": 9.720527045769766e-06, "loss": 0.1853, "step": 413 }, { "epoch": 0.28680290959473503, "grad_norm": 1.2101041078567505, "learning_rate": 9.719833564493759e-06, "loss": 0.2021, "step": 414 }, { "epoch": 0.28749567024593004, "grad_norm": 1.2365410327911377, "learning_rate": 9.719140083217754e-06, "loss": 0.2283, "step": 415 }, { "epoch": 0.28818843089712504, "grad_norm": 1.1555569171905518, "learning_rate": 9.718446601941747e-06, "loss": 0.2091, "step": 416 }, { "epoch": 0.28888119154832004, "grad_norm": 1.1707910299301147, "learning_rate": 9.717753120665742e-06, "loss": 0.1849, "step": 417 }, { "epoch": 0.2895739521995151, "grad_norm": 1.1495592594146729, "learning_rate": 9.717059639389737e-06, "loss": 0.185, "step": 418 }, { "epoch": 0.2902667128507101, "grad_norm": 1.1243607997894287, "learning_rate": 9.716366158113732e-06, "loss": 0.1699, "step": 419 }, { "epoch": 0.2909594735019051, "grad_norm": 1.0783498287200928, "learning_rate": 9.715672676837727e-06, "loss": 0.1778, "step": 420 }, { "epoch": 0.2916522341531001, "grad_norm": 1.098490834236145, "learning_rate": 9.71497919556172e-06, "loss": 0.185, "step": 421 }, { "epoch": 0.2923449948042951, "grad_norm": 1.2742648124694824, "learning_rate": 9.714285714285715e-06, "loss": 0.193, "step": 422 }, { "epoch": 0.2930377554554901, "grad_norm": 1.2350236177444458, "learning_rate": 9.71359223300971e-06, "loss": 0.2115, "step": 423 }, { "epoch": 0.29373051610668516, "grad_norm": 1.175158977508545, "learning_rate": 9.712898751733703e-06, "loss": 0.2067, "step": 424 }, { "epoch": 0.29442327675788016, "grad_norm": 1.1748855113983154, "learning_rate": 9.712205270457698e-06, "loss": 0.1961, "step": 425 }, { "epoch": 0.29511603740907516, "grad_norm": 1.1294324398040771, "learning_rate": 9.711511789181692e-06, "loss": 0.2103, "step": 426 }, { "epoch": 0.29580879806027016, "grad_norm": 1.0847774744033813, "learning_rate": 9.710818307905686e-06, "loss": 0.1919, "step": 427 }, { "epoch": 0.29650155871146516, "grad_norm": 1.0319700241088867, "learning_rate": 9.710124826629681e-06, "loss": 0.1564, "step": 428 }, { "epoch": 0.2971943193626602, "grad_norm": 1.1214897632598877, "learning_rate": 9.709431345353676e-06, "loss": 0.187, "step": 429 }, { "epoch": 0.2978870800138552, "grad_norm": 1.1951547861099243, "learning_rate": 9.708737864077671e-06, "loss": 0.1894, "step": 430 }, { "epoch": 0.2985798406650502, "grad_norm": 1.082057237625122, "learning_rate": 9.708044382801664e-06, "loss": 0.1784, "step": 431 }, { "epoch": 0.2992726013162452, "grad_norm": 1.1025521755218506, "learning_rate": 9.70735090152566e-06, "loss": 0.1705, "step": 432 }, { "epoch": 0.2999653619674402, "grad_norm": 1.2060528993606567, "learning_rate": 9.706657420249654e-06, "loss": 0.2305, "step": 433 }, { "epoch": 0.3006581226186353, "grad_norm": 1.2095818519592285, "learning_rate": 9.705963938973648e-06, "loss": 0.1757, "step": 434 }, { "epoch": 0.3013508832698303, "grad_norm": 1.1277533769607544, "learning_rate": 9.705270457697643e-06, "loss": 0.1559, "step": 435 }, { "epoch": 0.3020436439210253, "grad_norm": 1.0360640287399292, "learning_rate": 9.704576976421637e-06, "loss": 0.169, "step": 436 }, { "epoch": 0.3027364045722203, "grad_norm": 1.2301961183547974, "learning_rate": 9.703883495145632e-06, "loss": 0.198, "step": 437 }, { "epoch": 0.3034291652234153, "grad_norm": 1.2964677810668945, "learning_rate": 9.703190013869627e-06, "loss": 0.2049, "step": 438 }, { "epoch": 0.30412192587461034, "grad_norm": 1.098554015159607, "learning_rate": 9.70249653259362e-06, "loss": 0.1893, "step": 439 }, { "epoch": 0.30481468652580535, "grad_norm": 1.2005589008331299, "learning_rate": 9.701803051317616e-06, "loss": 0.211, "step": 440 }, { "epoch": 0.30550744717700035, "grad_norm": 1.1574318408966064, "learning_rate": 9.701109570041609e-06, "loss": 0.1909, "step": 441 }, { "epoch": 0.30620020782819535, "grad_norm": 1.2841503620147705, "learning_rate": 9.700416088765604e-06, "loss": 0.1951, "step": 442 }, { "epoch": 0.30689296847939035, "grad_norm": 1.0907950401306152, "learning_rate": 9.699722607489599e-06, "loss": 0.1671, "step": 443 }, { "epoch": 0.3075857291305854, "grad_norm": 1.0942476987838745, "learning_rate": 9.699029126213594e-06, "loss": 0.1738, "step": 444 }, { "epoch": 0.3082784897817804, "grad_norm": 1.1258342266082764, "learning_rate": 9.698335644937587e-06, "loss": 0.1602, "step": 445 }, { "epoch": 0.3089712504329754, "grad_norm": 1.1836743354797363, "learning_rate": 9.697642163661582e-06, "loss": 0.1926, "step": 446 }, { "epoch": 0.3096640110841704, "grad_norm": 1.2888097763061523, "learning_rate": 9.696948682385577e-06, "loss": 0.1945, "step": 447 }, { "epoch": 0.3103567717353654, "grad_norm": 1.0280569791793823, "learning_rate": 9.696255201109572e-06, "loss": 0.152, "step": 448 }, { "epoch": 0.31104953238656047, "grad_norm": 1.1025164127349854, "learning_rate": 9.695561719833565e-06, "loss": 0.2013, "step": 449 }, { "epoch": 0.31174229303775547, "grad_norm": 1.1613831520080566, "learning_rate": 9.69486823855756e-06, "loss": 0.1909, "step": 450 }, { "epoch": 0.31243505368895047, "grad_norm": 1.1448906660079956, "learning_rate": 9.694174757281553e-06, "loss": 0.209, "step": 451 }, { "epoch": 0.31312781434014547, "grad_norm": 1.0306289196014404, "learning_rate": 9.693481276005548e-06, "loss": 0.1618, "step": 452 }, { "epoch": 0.3138205749913405, "grad_norm": 1.0736443996429443, "learning_rate": 9.692787794729543e-06, "loss": 0.1656, "step": 453 }, { "epoch": 0.31451333564253553, "grad_norm": 1.131273627281189, "learning_rate": 9.692094313453538e-06, "loss": 0.1749, "step": 454 }, { "epoch": 0.31520609629373053, "grad_norm": 1.082929253578186, "learning_rate": 9.691400832177533e-06, "loss": 0.1634, "step": 455 }, { "epoch": 0.31589885694492553, "grad_norm": 1.1273523569107056, "learning_rate": 9.690707350901526e-06, "loss": 0.1873, "step": 456 }, { "epoch": 0.31659161759612053, "grad_norm": 1.0584896802902222, "learning_rate": 9.690013869625521e-06, "loss": 0.1625, "step": 457 }, { "epoch": 0.31728437824731553, "grad_norm": 1.2214630842208862, "learning_rate": 9.689320388349516e-06, "loss": 0.1886, "step": 458 }, { "epoch": 0.3179771388985106, "grad_norm": 1.0907474756240845, "learning_rate": 9.688626907073509e-06, "loss": 0.1835, "step": 459 }, { "epoch": 0.3186698995497056, "grad_norm": 1.190325379371643, "learning_rate": 9.687933425797504e-06, "loss": 0.1808, "step": 460 }, { "epoch": 0.3193626602009006, "grad_norm": 1.0343101024627686, "learning_rate": 9.687239944521499e-06, "loss": 0.1647, "step": 461 }, { "epoch": 0.3200554208520956, "grad_norm": 1.1271415948867798, "learning_rate": 9.686546463245494e-06, "loss": 0.1749, "step": 462 }, { "epoch": 0.3207481815032906, "grad_norm": 1.165846347808838, "learning_rate": 9.685852981969489e-06, "loss": 0.1609, "step": 463 }, { "epoch": 0.3214409421544856, "grad_norm": 1.1843103170394897, "learning_rate": 9.685159500693482e-06, "loss": 0.1854, "step": 464 }, { "epoch": 0.32213370280568066, "grad_norm": 1.0988398790359497, "learning_rate": 9.684466019417477e-06, "loss": 0.1786, "step": 465 }, { "epoch": 0.32282646345687566, "grad_norm": 1.2949365377426147, "learning_rate": 9.68377253814147e-06, "loss": 0.1947, "step": 466 }, { "epoch": 0.32351922410807066, "grad_norm": 1.3218495845794678, "learning_rate": 9.683079056865465e-06, "loss": 0.2363, "step": 467 }, { "epoch": 0.32421198475926566, "grad_norm": 1.0944557189941406, "learning_rate": 9.68238557558946e-06, "loss": 0.1645, "step": 468 }, { "epoch": 0.32490474541046066, "grad_norm": 1.2187862396240234, "learning_rate": 9.681692094313453e-06, "loss": 0.1986, "step": 469 }, { "epoch": 0.3255975060616557, "grad_norm": 1.1893078088760376, "learning_rate": 9.680998613037448e-06, "loss": 0.1879, "step": 470 }, { "epoch": 0.3262902667128507, "grad_norm": 1.2025196552276611, "learning_rate": 9.680305131761443e-06, "loss": 0.1843, "step": 471 }, { "epoch": 0.3269830273640457, "grad_norm": 1.064837098121643, "learning_rate": 9.679611650485438e-06, "loss": 0.161, "step": 472 }, { "epoch": 0.3276757880152407, "grad_norm": 1.1834200620651245, "learning_rate": 9.678918169209433e-06, "loss": 0.2052, "step": 473 }, { "epoch": 0.3283685486664357, "grad_norm": 1.117592215538025, "learning_rate": 9.678224687933426e-06, "loss": 0.1722, "step": 474 }, { "epoch": 0.3290613093176308, "grad_norm": 1.1554933786392212, "learning_rate": 9.677531206657421e-06, "loss": 0.1696, "step": 475 }, { "epoch": 0.3297540699688258, "grad_norm": 1.256576418876648, "learning_rate": 9.676837725381414e-06, "loss": 0.2044, "step": 476 }, { "epoch": 0.3304468306200208, "grad_norm": 1.1218692064285278, "learning_rate": 9.67614424410541e-06, "loss": 0.1754, "step": 477 }, { "epoch": 0.3311395912712158, "grad_norm": 1.0906285047531128, "learning_rate": 9.675450762829404e-06, "loss": 0.1782, "step": 478 }, { "epoch": 0.3318323519224108, "grad_norm": 1.1338284015655518, "learning_rate": 9.6747572815534e-06, "loss": 0.1699, "step": 479 }, { "epoch": 0.33252511257360584, "grad_norm": 1.0718350410461426, "learning_rate": 9.674063800277394e-06, "loss": 0.1797, "step": 480 }, { "epoch": 0.33321787322480084, "grad_norm": 1.0006433725357056, "learning_rate": 9.673370319001387e-06, "loss": 0.1455, "step": 481 }, { "epoch": 0.33391063387599584, "grad_norm": 0.9580618143081665, "learning_rate": 9.672676837725382e-06, "loss": 0.1499, "step": 482 }, { "epoch": 0.33460339452719084, "grad_norm": 1.1401749849319458, "learning_rate": 9.671983356449377e-06, "loss": 0.1873, "step": 483 }, { "epoch": 0.33529615517838585, "grad_norm": 1.089127779006958, "learning_rate": 9.67128987517337e-06, "loss": 0.165, "step": 484 }, { "epoch": 0.3359889158295809, "grad_norm": 1.109618902206421, "learning_rate": 9.670596393897365e-06, "loss": 0.1922, "step": 485 }, { "epoch": 0.3366816764807759, "grad_norm": 1.332227349281311, "learning_rate": 9.669902912621359e-06, "loss": 0.2299, "step": 486 }, { "epoch": 0.3373744371319709, "grad_norm": 1.1735680103302002, "learning_rate": 9.669209431345354e-06, "loss": 0.1865, "step": 487 }, { "epoch": 0.3380671977831659, "grad_norm": 1.0729219913482666, "learning_rate": 9.668515950069349e-06, "loss": 0.1925, "step": 488 }, { "epoch": 0.3387599584343609, "grad_norm": 1.0912272930145264, "learning_rate": 9.667822468793344e-06, "loss": 0.1672, "step": 489 }, { "epoch": 0.33945271908555597, "grad_norm": 1.1081303358078003, "learning_rate": 9.667128987517338e-06, "loss": 0.1879, "step": 490 }, { "epoch": 0.34014547973675097, "grad_norm": 1.1520613431930542, "learning_rate": 9.666435506241332e-06, "loss": 0.1807, "step": 491 }, { "epoch": 0.34083824038794597, "grad_norm": 1.1199016571044922, "learning_rate": 9.665742024965327e-06, "loss": 0.1783, "step": 492 }, { "epoch": 0.34153100103914097, "grad_norm": 1.2753344774246216, "learning_rate": 9.665048543689322e-06, "loss": 0.2021, "step": 493 }, { "epoch": 0.34222376169033597, "grad_norm": 1.1093775033950806, "learning_rate": 9.664355062413315e-06, "loss": 0.1505, "step": 494 }, { "epoch": 0.342916522341531, "grad_norm": 1.1510506868362427, "learning_rate": 9.66366158113731e-06, "loss": 0.1739, "step": 495 }, { "epoch": 0.34360928299272603, "grad_norm": 1.119981050491333, "learning_rate": 9.662968099861305e-06, "loss": 0.1804, "step": 496 }, { "epoch": 0.34430204364392103, "grad_norm": 1.1446335315704346, "learning_rate": 9.6622746185853e-06, "loss": 0.1772, "step": 497 }, { "epoch": 0.34499480429511603, "grad_norm": 1.0427706241607666, "learning_rate": 9.661581137309295e-06, "loss": 0.1481, "step": 498 }, { "epoch": 0.34568756494631103, "grad_norm": 1.0160114765167236, "learning_rate": 9.660887656033288e-06, "loss": 0.1445, "step": 499 }, { "epoch": 0.3463803255975061, "grad_norm": 1.117127537727356, "learning_rate": 9.660194174757283e-06, "loss": 0.1576, "step": 500 }, { "epoch": 0.3470730862487011, "grad_norm": 1.1823601722717285, "learning_rate": 9.659500693481276e-06, "loss": 0.2048, "step": 501 }, { "epoch": 0.3477658468998961, "grad_norm": 1.1818424463272095, "learning_rate": 9.658807212205271e-06, "loss": 0.2025, "step": 502 }, { "epoch": 0.3484586075510911, "grad_norm": 1.2560657262802124, "learning_rate": 9.658113730929266e-06, "loss": 0.1853, "step": 503 }, { "epoch": 0.3491513682022861, "grad_norm": 1.2229050397872925, "learning_rate": 9.657420249653259e-06, "loss": 0.2385, "step": 504 }, { "epoch": 0.3498441288534811, "grad_norm": 1.2232409715652466, "learning_rate": 9.656726768377254e-06, "loss": 0.212, "step": 505 }, { "epoch": 0.35053688950467615, "grad_norm": 1.0253208875656128, "learning_rate": 9.656033287101249e-06, "loss": 0.1585, "step": 506 }, { "epoch": 0.35122965015587115, "grad_norm": 1.1161078214645386, "learning_rate": 9.655339805825244e-06, "loss": 0.1998, "step": 507 }, { "epoch": 0.35192241080706615, "grad_norm": 1.1084858179092407, "learning_rate": 9.654646324549239e-06, "loss": 0.1627, "step": 508 }, { "epoch": 0.35261517145826116, "grad_norm": 1.2024314403533936, "learning_rate": 9.653952843273232e-06, "loss": 0.1539, "step": 509 }, { "epoch": 0.35330793210945616, "grad_norm": 1.1474061012268066, "learning_rate": 9.653259361997227e-06, "loss": 0.1886, "step": 510 }, { "epoch": 0.3540006927606512, "grad_norm": 1.1132161617279053, "learning_rate": 9.65256588072122e-06, "loss": 0.1882, "step": 511 }, { "epoch": 0.3546934534118462, "grad_norm": 1.0473583936691284, "learning_rate": 9.651872399445215e-06, "loss": 0.1735, "step": 512 }, { "epoch": 0.3553862140630412, "grad_norm": 1.031063199043274, "learning_rate": 9.65117891816921e-06, "loss": 0.1658, "step": 513 }, { "epoch": 0.3560789747142362, "grad_norm": 1.1989202499389648, "learning_rate": 9.650485436893205e-06, "loss": 0.1977, "step": 514 }, { "epoch": 0.3567717353654312, "grad_norm": 1.1673550605773926, "learning_rate": 9.6497919556172e-06, "loss": 0.1803, "step": 515 }, { "epoch": 0.3574644960166263, "grad_norm": 1.2909184694290161, "learning_rate": 9.649098474341193e-06, "loss": 0.1833, "step": 516 }, { "epoch": 0.3581572566678213, "grad_norm": 1.2750986814498901, "learning_rate": 9.648404993065188e-06, "loss": 0.1966, "step": 517 }, { "epoch": 0.3588500173190163, "grad_norm": 1.183593988418579, "learning_rate": 9.647711511789183e-06, "loss": 0.1902, "step": 518 }, { "epoch": 0.3595427779702113, "grad_norm": 1.1403534412384033, "learning_rate": 9.647018030513176e-06, "loss": 0.1636, "step": 519 }, { "epoch": 0.3602355386214063, "grad_norm": 1.127642273902893, "learning_rate": 9.646324549237171e-06, "loss": 0.1958, "step": 520 }, { "epoch": 0.36092829927260134, "grad_norm": 1.1878528594970703, "learning_rate": 9.645631067961166e-06, "loss": 0.1899, "step": 521 }, { "epoch": 0.36162105992379634, "grad_norm": 1.1126781702041626, "learning_rate": 9.64493758668516e-06, "loss": 0.1657, "step": 522 }, { "epoch": 0.36231382057499134, "grad_norm": 1.1267461776733398, "learning_rate": 9.644244105409154e-06, "loss": 0.1706, "step": 523 }, { "epoch": 0.36300658122618634, "grad_norm": 1.2233935594558716, "learning_rate": 9.64355062413315e-06, "loss": 0.1979, "step": 524 }, { "epoch": 0.36369934187738134, "grad_norm": 1.3893072605133057, "learning_rate": 9.642857142857144e-06, "loss": 0.2063, "step": 525 }, { "epoch": 0.3643921025285764, "grad_norm": 0.9728911519050598, "learning_rate": 9.642163661581137e-06, "loss": 0.1617, "step": 526 }, { "epoch": 0.3650848631797714, "grad_norm": 1.113114833831787, "learning_rate": 9.641470180305132e-06, "loss": 0.1787, "step": 527 }, { "epoch": 0.3657776238309664, "grad_norm": 1.1457524299621582, "learning_rate": 9.640776699029127e-06, "loss": 0.1738, "step": 528 }, { "epoch": 0.3664703844821614, "grad_norm": 0.9557360410690308, "learning_rate": 9.64008321775312e-06, "loss": 0.1419, "step": 529 }, { "epoch": 0.3671631451333564, "grad_norm": 1.0903773307800293, "learning_rate": 9.639389736477115e-06, "loss": 0.1851, "step": 530 }, { "epoch": 0.36785590578455146, "grad_norm": 1.0435748100280762, "learning_rate": 9.63869625520111e-06, "loss": 0.1541, "step": 531 }, { "epoch": 0.36854866643574646, "grad_norm": 1.2730120420455933, "learning_rate": 9.638002773925105e-06, "loss": 0.2004, "step": 532 }, { "epoch": 0.36924142708694146, "grad_norm": 1.177163004875183, "learning_rate": 9.6373092926491e-06, "loss": 0.1913, "step": 533 }, { "epoch": 0.36993418773813647, "grad_norm": 1.2473503351211548, "learning_rate": 9.636615811373094e-06, "loss": 0.1925, "step": 534 }, { "epoch": 0.37062694838933147, "grad_norm": 1.0158774852752686, "learning_rate": 9.635922330097088e-06, "loss": 0.1535, "step": 535 }, { "epoch": 0.3713197090405265, "grad_norm": 1.147966742515564, "learning_rate": 9.635228848821082e-06, "loss": 0.1947, "step": 536 }, { "epoch": 0.3720124696917215, "grad_norm": 1.1904044151306152, "learning_rate": 9.634535367545077e-06, "loss": 0.1971, "step": 537 }, { "epoch": 0.3727052303429165, "grad_norm": 1.0254184007644653, "learning_rate": 9.633841886269072e-06, "loss": 0.1567, "step": 538 }, { "epoch": 0.37339799099411153, "grad_norm": 1.207237958908081, "learning_rate": 9.633148404993066e-06, "loss": 0.1931, "step": 539 }, { "epoch": 0.37409075164530653, "grad_norm": 1.2658352851867676, "learning_rate": 9.632454923717061e-06, "loss": 0.171, "step": 540 }, { "epoch": 0.3747835122965016, "grad_norm": 1.3020246028900146, "learning_rate": 9.631761442441055e-06, "loss": 0.1924, "step": 541 }, { "epoch": 0.3754762729476966, "grad_norm": 1.0947773456573486, "learning_rate": 9.63106796116505e-06, "loss": 0.1948, "step": 542 }, { "epoch": 0.3761690335988916, "grad_norm": 1.281325340270996, "learning_rate": 9.630374479889045e-06, "loss": 0.2063, "step": 543 }, { "epoch": 0.3768617942500866, "grad_norm": 1.1554181575775146, "learning_rate": 9.629680998613038e-06, "loss": 0.1859, "step": 544 }, { "epoch": 0.3775545549012816, "grad_norm": 1.1861845254898071, "learning_rate": 9.628987517337033e-06, "loss": 0.184, "step": 545 }, { "epoch": 0.3782473155524766, "grad_norm": 1.0844807624816895, "learning_rate": 9.628294036061026e-06, "loss": 0.1549, "step": 546 }, { "epoch": 0.37894007620367165, "grad_norm": 1.0676765441894531, "learning_rate": 9.627600554785021e-06, "loss": 0.1719, "step": 547 }, { "epoch": 0.37963283685486665, "grad_norm": 1.0964168310165405, "learning_rate": 9.626907073509016e-06, "loss": 0.1632, "step": 548 }, { "epoch": 0.38032559750606165, "grad_norm": 1.1241437196731567, "learning_rate": 9.62621359223301e-06, "loss": 0.1702, "step": 549 }, { "epoch": 0.38101835815725665, "grad_norm": 0.990709662437439, "learning_rate": 9.625520110957006e-06, "loss": 0.1441, "step": 550 }, { "epoch": 0.38171111880845165, "grad_norm": 1.131478190422058, "learning_rate": 9.624826629680999e-06, "loss": 0.1793, "step": 551 }, { "epoch": 0.3824038794596467, "grad_norm": 1.108688473701477, "learning_rate": 9.624133148404994e-06, "loss": 0.1485, "step": 552 }, { "epoch": 0.3830966401108417, "grad_norm": 1.3076444864273071, "learning_rate": 9.623439667128989e-06, "loss": 0.2132, "step": 553 }, { "epoch": 0.3837894007620367, "grad_norm": 1.1900216341018677, "learning_rate": 9.622746185852982e-06, "loss": 0.1925, "step": 554 }, { "epoch": 0.3844821614132317, "grad_norm": 1.1948908567428589, "learning_rate": 9.622052704576977e-06, "loss": 0.1964, "step": 555 }, { "epoch": 0.3851749220644267, "grad_norm": 1.0484943389892578, "learning_rate": 9.621359223300972e-06, "loss": 0.1665, "step": 556 }, { "epoch": 0.3858676827156218, "grad_norm": 1.1652822494506836, "learning_rate": 9.620665742024967e-06, "loss": 0.2059, "step": 557 }, { "epoch": 0.3865604433668168, "grad_norm": 1.1573383808135986, "learning_rate": 9.619972260748962e-06, "loss": 0.1829, "step": 558 }, { "epoch": 0.3872532040180118, "grad_norm": 1.075415015220642, "learning_rate": 9.619278779472955e-06, "loss": 0.1573, "step": 559 }, { "epoch": 0.3879459646692068, "grad_norm": 1.114197850227356, "learning_rate": 9.61858529819695e-06, "loss": 0.183, "step": 560 }, { "epoch": 0.3886387253204018, "grad_norm": 1.2600793838500977, "learning_rate": 9.617891816920943e-06, "loss": 0.1897, "step": 561 }, { "epoch": 0.38933148597159684, "grad_norm": 1.266296625137329, "learning_rate": 9.617198335644938e-06, "loss": 0.1765, "step": 562 }, { "epoch": 0.39002424662279184, "grad_norm": 1.1767463684082031, "learning_rate": 9.616504854368933e-06, "loss": 0.1755, "step": 563 }, { "epoch": 0.39071700727398684, "grad_norm": 1.203078269958496, "learning_rate": 9.615811373092926e-06, "loss": 0.2081, "step": 564 }, { "epoch": 0.39140976792518184, "grad_norm": 1.230280876159668, "learning_rate": 9.615117891816921e-06, "loss": 0.1554, "step": 565 }, { "epoch": 0.39210252857637684, "grad_norm": 1.1045949459075928, "learning_rate": 9.614424410540916e-06, "loss": 0.1666, "step": 566 }, { "epoch": 0.3927952892275719, "grad_norm": 1.3441511392593384, "learning_rate": 9.613730929264911e-06, "loss": 0.1952, "step": 567 }, { "epoch": 0.3934880498787669, "grad_norm": 1.3405990600585938, "learning_rate": 9.613037447988906e-06, "loss": 0.2042, "step": 568 }, { "epoch": 0.3941808105299619, "grad_norm": 1.2307276725769043, "learning_rate": 9.6123439667129e-06, "loss": 0.2099, "step": 569 }, { "epoch": 0.3948735711811569, "grad_norm": 1.0651723146438599, "learning_rate": 9.611650485436894e-06, "loss": 0.1796, "step": 570 }, { "epoch": 0.3955663318323519, "grad_norm": 1.2337777614593506, "learning_rate": 9.610957004160887e-06, "loss": 0.2137, "step": 571 }, { "epoch": 0.39625909248354696, "grad_norm": 1.0993244647979736, "learning_rate": 9.610263522884882e-06, "loss": 0.1714, "step": 572 }, { "epoch": 0.39695185313474196, "grad_norm": 1.1756548881530762, "learning_rate": 9.609570041608877e-06, "loss": 0.1684, "step": 573 }, { "epoch": 0.39764461378593696, "grad_norm": 1.0881178379058838, "learning_rate": 9.608876560332872e-06, "loss": 0.1784, "step": 574 }, { "epoch": 0.39833737443713196, "grad_norm": 1.1008052825927734, "learning_rate": 9.608183079056867e-06, "loss": 0.1593, "step": 575 }, { "epoch": 0.39903013508832696, "grad_norm": 1.2633024454116821, "learning_rate": 9.60748959778086e-06, "loss": 0.1946, "step": 576 }, { "epoch": 0.399722895739522, "grad_norm": 1.450706958770752, "learning_rate": 9.606796116504855e-06, "loss": 0.2175, "step": 577 }, { "epoch": 0.400415656390717, "grad_norm": 1.2185648679733276, "learning_rate": 9.60610263522885e-06, "loss": 0.1849, "step": 578 }, { "epoch": 0.401108417041912, "grad_norm": 1.235177755355835, "learning_rate": 9.605409153952843e-06, "loss": 0.1845, "step": 579 }, { "epoch": 0.401801177693107, "grad_norm": 1.2215522527694702, "learning_rate": 9.604715672676838e-06, "loss": 0.1945, "step": 580 }, { "epoch": 0.402493938344302, "grad_norm": 1.1482781171798706, "learning_rate": 9.604022191400832e-06, "loss": 0.1685, "step": 581 }, { "epoch": 0.40318669899549703, "grad_norm": 1.165074348449707, "learning_rate": 9.603328710124827e-06, "loss": 0.1873, "step": 582 }, { "epoch": 0.4038794596466921, "grad_norm": 1.2293803691864014, "learning_rate": 9.602635228848822e-06, "loss": 0.1879, "step": 583 }, { "epoch": 0.4045722202978871, "grad_norm": 1.126194953918457, "learning_rate": 9.601941747572816e-06, "loss": 0.1751, "step": 584 }, { "epoch": 0.4052649809490821, "grad_norm": 1.2499005794525146, "learning_rate": 9.601248266296811e-06, "loss": 0.1672, "step": 585 }, { "epoch": 0.4059577416002771, "grad_norm": 1.1256407499313354, "learning_rate": 9.600554785020805e-06, "loss": 0.1786, "step": 586 }, { "epoch": 0.4066505022514721, "grad_norm": 1.1976889371871948, "learning_rate": 9.5998613037448e-06, "loss": 0.1695, "step": 587 }, { "epoch": 0.40734326290266715, "grad_norm": 1.0952256917953491, "learning_rate": 9.599167822468795e-06, "loss": 0.1888, "step": 588 }, { "epoch": 0.40803602355386215, "grad_norm": 1.134501338005066, "learning_rate": 9.598474341192788e-06, "loss": 0.165, "step": 589 }, { "epoch": 0.40872878420505715, "grad_norm": 1.1301794052124023, "learning_rate": 9.597780859916783e-06, "loss": 0.1749, "step": 590 }, { "epoch": 0.40942154485625215, "grad_norm": 1.1504310369491577, "learning_rate": 9.597087378640778e-06, "loss": 0.1703, "step": 591 }, { "epoch": 0.41011430550744715, "grad_norm": 1.1471697092056274, "learning_rate": 9.596393897364773e-06, "loss": 0.1733, "step": 592 }, { "epoch": 0.4108070661586422, "grad_norm": 1.0838083028793335, "learning_rate": 9.595700416088767e-06, "loss": 0.1634, "step": 593 }, { "epoch": 0.4114998268098372, "grad_norm": 1.149314284324646, "learning_rate": 9.59500693481276e-06, "loss": 0.1681, "step": 594 }, { "epoch": 0.4121925874610322, "grad_norm": 1.072961688041687, "learning_rate": 9.594313453536756e-06, "loss": 0.1765, "step": 595 }, { "epoch": 0.4128853481122272, "grad_norm": 1.0739357471466064, "learning_rate": 9.593619972260749e-06, "loss": 0.1644, "step": 596 }, { "epoch": 0.4135781087634222, "grad_norm": 1.1856303215026855, "learning_rate": 9.592926490984744e-06, "loss": 0.1729, "step": 597 }, { "epoch": 0.41427086941461727, "grad_norm": 1.0169947147369385, "learning_rate": 9.592233009708739e-06, "loss": 0.1483, "step": 598 }, { "epoch": 0.4149636300658123, "grad_norm": 1.1429182291030884, "learning_rate": 9.591539528432732e-06, "loss": 0.1909, "step": 599 }, { "epoch": 0.4156563907170073, "grad_norm": 1.009037971496582, "learning_rate": 9.590846047156727e-06, "loss": 0.1578, "step": 600 }, { "epoch": 0.4163491513682023, "grad_norm": 1.1267091035842896, "learning_rate": 9.590152565880722e-06, "loss": 0.1718, "step": 601 }, { "epoch": 0.4170419120193973, "grad_norm": 1.2382667064666748, "learning_rate": 9.589459084604717e-06, "loss": 0.2238, "step": 602 }, { "epoch": 0.41773467267059233, "grad_norm": 1.196805715560913, "learning_rate": 9.588765603328712e-06, "loss": 0.1702, "step": 603 }, { "epoch": 0.41842743332178733, "grad_norm": 1.0227054357528687, "learning_rate": 9.588072122052705e-06, "loss": 0.1671, "step": 604 }, { "epoch": 0.41912019397298234, "grad_norm": 1.0865464210510254, "learning_rate": 9.5873786407767e-06, "loss": 0.163, "step": 605 }, { "epoch": 0.41981295462417734, "grad_norm": 1.0725364685058594, "learning_rate": 9.586685159500693e-06, "loss": 0.1627, "step": 606 }, { "epoch": 0.42050571527537234, "grad_norm": 1.1005607843399048, "learning_rate": 9.585991678224688e-06, "loss": 0.1541, "step": 607 }, { "epoch": 0.4211984759265674, "grad_norm": 1.1609758138656616, "learning_rate": 9.585298196948683e-06, "loss": 0.1646, "step": 608 }, { "epoch": 0.4218912365777624, "grad_norm": 0.9930508136749268, "learning_rate": 9.584604715672678e-06, "loss": 0.1479, "step": 609 }, { "epoch": 0.4225839972289574, "grad_norm": 1.3058476448059082, "learning_rate": 9.583911234396673e-06, "loss": 0.1913, "step": 610 }, { "epoch": 0.4232767578801524, "grad_norm": 1.258899211883545, "learning_rate": 9.583217753120666e-06, "loss": 0.2204, "step": 611 }, { "epoch": 0.4239695185313474, "grad_norm": 1.1631954908370972, "learning_rate": 9.582524271844661e-06, "loss": 0.1746, "step": 612 }, { "epoch": 0.42466227918254246, "grad_norm": 1.2395148277282715, "learning_rate": 9.581830790568656e-06, "loss": 0.1733, "step": 613 }, { "epoch": 0.42535503983373746, "grad_norm": 1.0607619285583496, "learning_rate": 9.58113730929265e-06, "loss": 0.1603, "step": 614 }, { "epoch": 0.42604780048493246, "grad_norm": 1.1310009956359863, "learning_rate": 9.580443828016644e-06, "loss": 0.1972, "step": 615 }, { "epoch": 0.42674056113612746, "grad_norm": 1.1180874109268188, "learning_rate": 9.579750346740639e-06, "loss": 0.1816, "step": 616 }, { "epoch": 0.42743332178732246, "grad_norm": 1.2065924406051636, "learning_rate": 9.579056865464634e-06, "loss": 0.2039, "step": 617 }, { "epoch": 0.4281260824385175, "grad_norm": 1.1499533653259277, "learning_rate": 9.578363384188627e-06, "loss": 0.1819, "step": 618 }, { "epoch": 0.4288188430897125, "grad_norm": 1.1326128244400024, "learning_rate": 9.577669902912622e-06, "loss": 0.1827, "step": 619 }, { "epoch": 0.4295116037409075, "grad_norm": 1.0215452909469604, "learning_rate": 9.576976421636617e-06, "loss": 0.1887, "step": 620 }, { "epoch": 0.4302043643921025, "grad_norm": 1.0503754615783691, "learning_rate": 9.57628294036061e-06, "loss": 0.1753, "step": 621 }, { "epoch": 0.4308971250432975, "grad_norm": 1.2217227220535278, "learning_rate": 9.575589459084605e-06, "loss": 0.1885, "step": 622 }, { "epoch": 0.4315898856944925, "grad_norm": 1.1326897144317627, "learning_rate": 9.5748959778086e-06, "loss": 0.2021, "step": 623 }, { "epoch": 0.4322826463456876, "grad_norm": 1.143894910812378, "learning_rate": 9.574202496532593e-06, "loss": 0.2095, "step": 624 }, { "epoch": 0.4329754069968826, "grad_norm": 1.1931277513504028, "learning_rate": 9.573509015256588e-06, "loss": 0.222, "step": 625 }, { "epoch": 0.4336681676480776, "grad_norm": 1.1747947931289673, "learning_rate": 9.572815533980583e-06, "loss": 0.217, "step": 626 }, { "epoch": 0.4343609282992726, "grad_norm": 1.1393777132034302, "learning_rate": 9.572122052704578e-06, "loss": 0.191, "step": 627 }, { "epoch": 0.4350536889504676, "grad_norm": 1.1151050329208374, "learning_rate": 9.571428571428573e-06, "loss": 0.1918, "step": 628 }, { "epoch": 0.43574644960166264, "grad_norm": 1.0819246768951416, "learning_rate": 9.570735090152566e-06, "loss": 0.1976, "step": 629 }, { "epoch": 0.43643921025285765, "grad_norm": 0.962162971496582, "learning_rate": 9.570041608876561e-06, "loss": 0.1707, "step": 630 }, { "epoch": 0.43713197090405265, "grad_norm": 1.1759436130523682, "learning_rate": 9.569348127600555e-06, "loss": 0.2099, "step": 631 }, { "epoch": 0.43782473155524765, "grad_norm": 1.060920000076294, "learning_rate": 9.56865464632455e-06, "loss": 0.161, "step": 632 }, { "epoch": 0.43851749220644265, "grad_norm": 1.1771917343139648, "learning_rate": 9.567961165048544e-06, "loss": 0.18, "step": 633 }, { "epoch": 0.4392102528576377, "grad_norm": 1.1630181074142456, "learning_rate": 9.56726768377254e-06, "loss": 0.2122, "step": 634 }, { "epoch": 0.4399030135088327, "grad_norm": 1.141360878944397, "learning_rate": 9.566574202496534e-06, "loss": 0.1777, "step": 635 }, { "epoch": 0.4405957741600277, "grad_norm": 1.2425981760025024, "learning_rate": 9.565880721220528e-06, "loss": 0.1852, "step": 636 }, { "epoch": 0.4412885348112227, "grad_norm": 1.1600677967071533, "learning_rate": 9.565187239944523e-06, "loss": 0.1837, "step": 637 }, { "epoch": 0.4419812954624177, "grad_norm": 1.2033652067184448, "learning_rate": 9.564493758668517e-06, "loss": 0.1933, "step": 638 }, { "epoch": 0.44267405611361277, "grad_norm": 1.1137166023254395, "learning_rate": 9.56380027739251e-06, "loss": 0.1688, "step": 639 }, { "epoch": 0.44336681676480777, "grad_norm": 1.1078693866729736, "learning_rate": 9.563106796116506e-06, "loss": 0.1839, "step": 640 }, { "epoch": 0.44405957741600277, "grad_norm": 1.2090137004852295, "learning_rate": 9.562413314840499e-06, "loss": 0.1989, "step": 641 }, { "epoch": 0.44475233806719777, "grad_norm": 1.0715160369873047, "learning_rate": 9.561719833564494e-06, "loss": 0.1677, "step": 642 }, { "epoch": 0.4454450987183928, "grad_norm": 1.1578370332717896, "learning_rate": 9.561026352288489e-06, "loss": 0.1691, "step": 643 }, { "epoch": 0.44613785936958783, "grad_norm": 1.098793864250183, "learning_rate": 9.560332871012484e-06, "loss": 0.145, "step": 644 }, { "epoch": 0.44683062002078283, "grad_norm": 1.1655832529067993, "learning_rate": 9.559639389736479e-06, "loss": 0.2167, "step": 645 }, { "epoch": 0.44752338067197783, "grad_norm": 1.0528911352157593, "learning_rate": 9.558945908460472e-06, "loss": 0.155, "step": 646 }, { "epoch": 0.44821614132317283, "grad_norm": 1.0747157335281372, "learning_rate": 9.558252427184467e-06, "loss": 0.1867, "step": 647 }, { "epoch": 0.44890890197436784, "grad_norm": 1.0972528457641602, "learning_rate": 9.557558945908462e-06, "loss": 0.1949, "step": 648 }, { "epoch": 0.4496016626255629, "grad_norm": 1.099705696105957, "learning_rate": 9.556865464632455e-06, "loss": 0.1808, "step": 649 }, { "epoch": 0.4502944232767579, "grad_norm": 1.1693402528762817, "learning_rate": 9.55617198335645e-06, "loss": 0.1955, "step": 650 }, { "epoch": 0.4509871839279529, "grad_norm": 1.0074596405029297, "learning_rate": 9.555478502080445e-06, "loss": 0.1664, "step": 651 }, { "epoch": 0.4516799445791479, "grad_norm": 1.0684326887130737, "learning_rate": 9.55478502080444e-06, "loss": 0.1709, "step": 652 }, { "epoch": 0.4523727052303429, "grad_norm": 1.0958446264266968, "learning_rate": 9.554091539528435e-06, "loss": 0.1665, "step": 653 }, { "epoch": 0.45306546588153795, "grad_norm": 1.1592566967010498, "learning_rate": 9.553398058252428e-06, "loss": 0.2131, "step": 654 }, { "epoch": 0.45375822653273296, "grad_norm": 1.1357659101486206, "learning_rate": 9.552704576976423e-06, "loss": 0.185, "step": 655 }, { "epoch": 0.45445098718392796, "grad_norm": 1.0651899576187134, "learning_rate": 9.552011095700416e-06, "loss": 0.1653, "step": 656 }, { "epoch": 0.45514374783512296, "grad_norm": 1.1451846361160278, "learning_rate": 9.551317614424411e-06, "loss": 0.1967, "step": 657 }, { "epoch": 0.45583650848631796, "grad_norm": 1.0786046981811523, "learning_rate": 9.550624133148406e-06, "loss": 0.196, "step": 658 }, { "epoch": 0.456529269137513, "grad_norm": 0.9809712171554565, "learning_rate": 9.5499306518724e-06, "loss": 0.1568, "step": 659 }, { "epoch": 0.457222029788708, "grad_norm": 1.2117514610290527, "learning_rate": 9.549237170596394e-06, "loss": 0.1804, "step": 660 }, { "epoch": 0.457914790439903, "grad_norm": 1.1225571632385254, "learning_rate": 9.548543689320389e-06, "loss": 0.1837, "step": 661 }, { "epoch": 0.458607551091098, "grad_norm": 1.0222355127334595, "learning_rate": 9.547850208044384e-06, "loss": 0.1809, "step": 662 }, { "epoch": 0.459300311742293, "grad_norm": 1.2313727140426636, "learning_rate": 9.547156726768379e-06, "loss": 0.2136, "step": 663 }, { "epoch": 0.459993072393488, "grad_norm": 1.2407517433166504, "learning_rate": 9.546463245492372e-06, "loss": 0.1986, "step": 664 }, { "epoch": 0.4606858330446831, "grad_norm": 1.126572847366333, "learning_rate": 9.545769764216367e-06, "loss": 0.1767, "step": 665 }, { "epoch": 0.4613785936958781, "grad_norm": 1.0602760314941406, "learning_rate": 9.54507628294036e-06, "loss": 0.1603, "step": 666 }, { "epoch": 0.4620713543470731, "grad_norm": 1.2250707149505615, "learning_rate": 9.544382801664355e-06, "loss": 0.1639, "step": 667 }, { "epoch": 0.4627641149982681, "grad_norm": 1.2366909980773926, "learning_rate": 9.54368932038835e-06, "loss": 0.2106, "step": 668 }, { "epoch": 0.4634568756494631, "grad_norm": 1.2094905376434326, "learning_rate": 9.542995839112345e-06, "loss": 0.2088, "step": 669 }, { "epoch": 0.46414963630065814, "grad_norm": 1.1645236015319824, "learning_rate": 9.54230235783634e-06, "loss": 0.17, "step": 670 }, { "epoch": 0.46484239695185314, "grad_norm": 1.1013844013214111, "learning_rate": 9.541608876560333e-06, "loss": 0.1776, "step": 671 }, { "epoch": 0.46553515760304814, "grad_norm": 1.1031912565231323, "learning_rate": 9.540915395284328e-06, "loss": 0.1666, "step": 672 }, { "epoch": 0.46622791825424315, "grad_norm": 1.1358929872512817, "learning_rate": 9.540221914008323e-06, "loss": 0.1939, "step": 673 }, { "epoch": 0.46692067890543815, "grad_norm": 1.0319671630859375, "learning_rate": 9.539528432732316e-06, "loss": 0.16, "step": 674 }, { "epoch": 0.4676134395566332, "grad_norm": 1.1813427209854126, "learning_rate": 9.538834951456311e-06, "loss": 0.1536, "step": 675 }, { "epoch": 0.4683062002078282, "grad_norm": 1.269011378288269, "learning_rate": 9.538141470180306e-06, "loss": 0.19, "step": 676 }, { "epoch": 0.4689989608590232, "grad_norm": 1.142793893814087, "learning_rate": 9.5374479889043e-06, "loss": 0.1707, "step": 677 }, { "epoch": 0.4696917215102182, "grad_norm": 1.0003671646118164, "learning_rate": 9.536754507628294e-06, "loss": 0.1606, "step": 678 }, { "epoch": 0.4703844821614132, "grad_norm": 1.093482255935669, "learning_rate": 9.53606102635229e-06, "loss": 0.1654, "step": 679 }, { "epoch": 0.47107724281260827, "grad_norm": 1.0161356925964355, "learning_rate": 9.535367545076284e-06, "loss": 0.1546, "step": 680 }, { "epoch": 0.47177000346380327, "grad_norm": 1.2691065073013306, "learning_rate": 9.534674063800278e-06, "loss": 0.1511, "step": 681 }, { "epoch": 0.47246276411499827, "grad_norm": 1.1133615970611572, "learning_rate": 9.533980582524273e-06, "loss": 0.1745, "step": 682 }, { "epoch": 0.47315552476619327, "grad_norm": 1.132333517074585, "learning_rate": 9.533287101248267e-06, "loss": 0.158, "step": 683 }, { "epoch": 0.47384828541738827, "grad_norm": 1.217966914176941, "learning_rate": 9.53259361997226e-06, "loss": 0.1904, "step": 684 }, { "epoch": 0.4745410460685833, "grad_norm": 1.2397990226745605, "learning_rate": 9.531900138696256e-06, "loss": 0.1963, "step": 685 }, { "epoch": 0.47523380671977833, "grad_norm": 1.2267277240753174, "learning_rate": 9.53120665742025e-06, "loss": 0.1831, "step": 686 }, { "epoch": 0.47592656737097333, "grad_norm": 1.0864651203155518, "learning_rate": 9.530513176144245e-06, "loss": 0.1771, "step": 687 }, { "epoch": 0.47661932802216833, "grad_norm": 1.104586124420166, "learning_rate": 9.52981969486824e-06, "loss": 0.1497, "step": 688 }, { "epoch": 0.47731208867336333, "grad_norm": 1.0452547073364258, "learning_rate": 9.529126213592234e-06, "loss": 0.1816, "step": 689 }, { "epoch": 0.4780048493245584, "grad_norm": 1.189637541770935, "learning_rate": 9.528432732316229e-06, "loss": 0.2085, "step": 690 }, { "epoch": 0.4786976099757534, "grad_norm": 0.9842264652252197, "learning_rate": 9.527739251040222e-06, "loss": 0.168, "step": 691 }, { "epoch": 0.4793903706269484, "grad_norm": 1.2393513917922974, "learning_rate": 9.527045769764217e-06, "loss": 0.1678, "step": 692 }, { "epoch": 0.4800831312781434, "grad_norm": 1.2825803756713867, "learning_rate": 9.526352288488212e-06, "loss": 0.1998, "step": 693 }, { "epoch": 0.4807758919293384, "grad_norm": 1.0857014656066895, "learning_rate": 9.525658807212207e-06, "loss": 0.1661, "step": 694 }, { "epoch": 0.48146865258053345, "grad_norm": 1.0867372751235962, "learning_rate": 9.524965325936202e-06, "loss": 0.194, "step": 695 }, { "epoch": 0.48216141323172845, "grad_norm": 1.1201382875442505, "learning_rate": 9.524271844660195e-06, "loss": 0.1708, "step": 696 }, { "epoch": 0.48285417388292345, "grad_norm": 1.0329430103302002, "learning_rate": 9.52357836338419e-06, "loss": 0.1904, "step": 697 }, { "epoch": 0.48354693453411846, "grad_norm": 1.0161997079849243, "learning_rate": 9.522884882108185e-06, "loss": 0.1558, "step": 698 }, { "epoch": 0.48423969518531346, "grad_norm": 1.0819106101989746, "learning_rate": 9.522191400832178e-06, "loss": 0.1486, "step": 699 }, { "epoch": 0.4849324558365085, "grad_norm": 1.1887950897216797, "learning_rate": 9.521497919556173e-06, "loss": 0.1777, "step": 700 }, { "epoch": 0.4856252164877035, "grad_norm": 1.1242791414260864, "learning_rate": 9.520804438280166e-06, "loss": 0.1696, "step": 701 }, { "epoch": 0.4863179771388985, "grad_norm": 1.224696397781372, "learning_rate": 9.520110957004161e-06, "loss": 0.2119, "step": 702 }, { "epoch": 0.4870107377900935, "grad_norm": 1.1201990842819214, "learning_rate": 9.519417475728156e-06, "loss": 0.1641, "step": 703 }, { "epoch": 0.4877034984412885, "grad_norm": 1.097137689590454, "learning_rate": 9.518723994452151e-06, "loss": 0.1775, "step": 704 }, { "epoch": 0.4883962590924835, "grad_norm": 1.062699556350708, "learning_rate": 9.518030513176146e-06, "loss": 0.1501, "step": 705 }, { "epoch": 0.4890890197436786, "grad_norm": 1.1051220893859863, "learning_rate": 9.517337031900139e-06, "loss": 0.1586, "step": 706 }, { "epoch": 0.4897817803948736, "grad_norm": 1.1447747945785522, "learning_rate": 9.516643550624134e-06, "loss": 0.1961, "step": 707 }, { "epoch": 0.4904745410460686, "grad_norm": 1.0780168771743774, "learning_rate": 9.515950069348129e-06, "loss": 0.1665, "step": 708 }, { "epoch": 0.4911673016972636, "grad_norm": 1.154279112815857, "learning_rate": 9.515256588072122e-06, "loss": 0.1913, "step": 709 }, { "epoch": 0.4918600623484586, "grad_norm": 1.137214183807373, "learning_rate": 9.514563106796117e-06, "loss": 0.1734, "step": 710 }, { "epoch": 0.49255282299965364, "grad_norm": 1.1921672821044922, "learning_rate": 9.513869625520112e-06, "loss": 0.1781, "step": 711 }, { "epoch": 0.49324558365084864, "grad_norm": 1.0072582960128784, "learning_rate": 9.513176144244107e-06, "loss": 0.1362, "step": 712 }, { "epoch": 0.49393834430204364, "grad_norm": 1.1574749946594238, "learning_rate": 9.512482662968102e-06, "loss": 0.1693, "step": 713 }, { "epoch": 0.49463110495323864, "grad_norm": 1.2283178567886353, "learning_rate": 9.511789181692095e-06, "loss": 0.1753, "step": 714 }, { "epoch": 0.49532386560443364, "grad_norm": 0.8855034708976746, "learning_rate": 9.51109570041609e-06, "loss": 0.131, "step": 715 }, { "epoch": 0.4960166262556287, "grad_norm": 1.0271499156951904, "learning_rate": 9.510402219140083e-06, "loss": 0.1416, "step": 716 }, { "epoch": 0.4967093869068237, "grad_norm": 1.055769920349121, "learning_rate": 9.509708737864078e-06, "loss": 0.1802, "step": 717 }, { "epoch": 0.4974021475580187, "grad_norm": 1.1768474578857422, "learning_rate": 9.509015256588073e-06, "loss": 0.1647, "step": 718 }, { "epoch": 0.4980949082092137, "grad_norm": 1.1791810989379883, "learning_rate": 9.508321775312066e-06, "loss": 0.216, "step": 719 }, { "epoch": 0.4987876688604087, "grad_norm": 1.0952759981155396, "learning_rate": 9.507628294036061e-06, "loss": 0.1976, "step": 720 }, { "epoch": 0.49948042951160376, "grad_norm": 1.055741548538208, "learning_rate": 9.506934812760056e-06, "loss": 0.1744, "step": 721 }, { "epoch": 0.5001731901627987, "grad_norm": 1.1406736373901367, "learning_rate": 9.506241331484051e-06, "loss": 0.1761, "step": 722 }, { "epoch": 0.5008659508139938, "grad_norm": 1.1879351139068604, "learning_rate": 9.505547850208046e-06, "loss": 0.1842, "step": 723 }, { "epoch": 0.5015587114651888, "grad_norm": 1.1181124448776245, "learning_rate": 9.50485436893204e-06, "loss": 0.1799, "step": 724 }, { "epoch": 0.5022514721163838, "grad_norm": 1.1482161283493042, "learning_rate": 9.504160887656034e-06, "loss": 0.1711, "step": 725 }, { "epoch": 0.5029442327675788, "grad_norm": 1.0373079776763916, "learning_rate": 9.503467406380028e-06, "loss": 0.1639, "step": 726 }, { "epoch": 0.5036369934187738, "grad_norm": 0.9980860948562622, "learning_rate": 9.502773925104022e-06, "loss": 0.1604, "step": 727 }, { "epoch": 0.5043297540699688, "grad_norm": 1.0549565553665161, "learning_rate": 9.502080443828017e-06, "loss": 0.1876, "step": 728 }, { "epoch": 0.5050225147211639, "grad_norm": 1.1054435968399048, "learning_rate": 9.501386962552012e-06, "loss": 0.1753, "step": 729 }, { "epoch": 0.5057152753723588, "grad_norm": 1.103574514389038, "learning_rate": 9.500693481276007e-06, "loss": 0.1581, "step": 730 }, { "epoch": 0.5064080360235539, "grad_norm": 1.071014165878296, "learning_rate": 9.5e-06, "loss": 0.1746, "step": 731 }, { "epoch": 0.5071007966747488, "grad_norm": 1.1004180908203125, "learning_rate": 9.499306518723995e-06, "loss": 0.1914, "step": 732 }, { "epoch": 0.5077935573259439, "grad_norm": 1.2091470956802368, "learning_rate": 9.49861303744799e-06, "loss": 0.1894, "step": 733 }, { "epoch": 0.508486317977139, "grad_norm": 1.163714051246643, "learning_rate": 9.497919556171984e-06, "loss": 0.1919, "step": 734 }, { "epoch": 0.5091790786283339, "grad_norm": 1.2377830743789673, "learning_rate": 9.497226074895979e-06, "loss": 0.1979, "step": 735 }, { "epoch": 0.509871839279529, "grad_norm": 1.0097078084945679, "learning_rate": 9.496532593619972e-06, "loss": 0.1594, "step": 736 }, { "epoch": 0.5105645999307239, "grad_norm": 1.0955952405929565, "learning_rate": 9.495839112343967e-06, "loss": 0.1671, "step": 737 }, { "epoch": 0.511257360581919, "grad_norm": 1.084110975265503, "learning_rate": 9.495145631067962e-06, "loss": 0.1764, "step": 738 }, { "epoch": 0.511950121233114, "grad_norm": 1.0516014099121094, "learning_rate": 9.494452149791957e-06, "loss": 0.1606, "step": 739 }, { "epoch": 0.512642881884309, "grad_norm": 1.0338329076766968, "learning_rate": 9.493758668515952e-06, "loss": 0.1611, "step": 740 }, { "epoch": 0.513335642535504, "grad_norm": 1.1432766914367676, "learning_rate": 9.493065187239945e-06, "loss": 0.1695, "step": 741 }, { "epoch": 0.514028403186699, "grad_norm": 1.0821613073349, "learning_rate": 9.49237170596394e-06, "loss": 0.1746, "step": 742 }, { "epoch": 0.514721163837894, "grad_norm": 1.1612175703048706, "learning_rate": 9.491678224687935e-06, "loss": 0.1675, "step": 743 }, { "epoch": 0.5154139244890891, "grad_norm": 1.1131502389907837, "learning_rate": 9.490984743411928e-06, "loss": 0.1655, "step": 744 }, { "epoch": 0.516106685140284, "grad_norm": 1.0816477537155151, "learning_rate": 9.490291262135923e-06, "loss": 0.1672, "step": 745 }, { "epoch": 0.5167994457914791, "grad_norm": 1.1418256759643555, "learning_rate": 9.489597780859918e-06, "loss": 0.1934, "step": 746 }, { "epoch": 0.517492206442674, "grad_norm": 1.1041661500930786, "learning_rate": 9.488904299583913e-06, "loss": 0.1733, "step": 747 }, { "epoch": 0.5181849670938691, "grad_norm": 1.169494867324829, "learning_rate": 9.488210818307908e-06, "loss": 0.1676, "step": 748 }, { "epoch": 0.5188777277450641, "grad_norm": 1.0808500051498413, "learning_rate": 9.487517337031901e-06, "loss": 0.1641, "step": 749 }, { "epoch": 0.5195704883962591, "grad_norm": 1.0408234596252441, "learning_rate": 9.486823855755896e-06, "loss": 0.1712, "step": 750 }, { "epoch": 0.5202632490474541, "grad_norm": 1.096534252166748, "learning_rate": 9.486130374479889e-06, "loss": 0.1697, "step": 751 }, { "epoch": 0.5209560096986491, "grad_norm": 1.1266546249389648, "learning_rate": 9.485436893203884e-06, "loss": 0.1733, "step": 752 }, { "epoch": 0.5216487703498441, "grad_norm": 1.0809706449508667, "learning_rate": 9.484743411927879e-06, "loss": 0.1488, "step": 753 }, { "epoch": 0.5223415310010392, "grad_norm": 1.0462276935577393, "learning_rate": 9.484049930651872e-06, "loss": 0.1563, "step": 754 }, { "epoch": 0.5230342916522341, "grad_norm": 0.9936279654502869, "learning_rate": 9.483356449375867e-06, "loss": 0.1465, "step": 755 }, { "epoch": 0.5237270523034292, "grad_norm": 1.135073184967041, "learning_rate": 9.482662968099862e-06, "loss": 0.2139, "step": 756 }, { "epoch": 0.5244198129546241, "grad_norm": 1.115220069885254, "learning_rate": 9.481969486823857e-06, "loss": 0.1782, "step": 757 }, { "epoch": 0.5251125736058192, "grad_norm": 1.0991733074188232, "learning_rate": 9.481276005547852e-06, "loss": 0.1593, "step": 758 }, { "epoch": 0.5258053342570143, "grad_norm": 1.1360588073730469, "learning_rate": 9.480582524271845e-06, "loss": 0.1931, "step": 759 }, { "epoch": 0.5264980949082092, "grad_norm": 1.1279994249343872, "learning_rate": 9.47988904299584e-06, "loss": 0.1854, "step": 760 }, { "epoch": 0.5271908555594043, "grad_norm": 1.1006906032562256, "learning_rate": 9.479195561719833e-06, "loss": 0.1527, "step": 761 }, { "epoch": 0.5278836162105992, "grad_norm": 1.2920564413070679, "learning_rate": 9.478502080443828e-06, "loss": 0.2194, "step": 762 }, { "epoch": 0.5285763768617943, "grad_norm": 1.049129605293274, "learning_rate": 9.477808599167823e-06, "loss": 0.1614, "step": 763 }, { "epoch": 0.5292691375129892, "grad_norm": 1.2203729152679443, "learning_rate": 9.477115117891818e-06, "loss": 0.1753, "step": 764 }, { "epoch": 0.5299618981641843, "grad_norm": 1.2329607009887695, "learning_rate": 9.476421636615813e-06, "loss": 0.1734, "step": 765 }, { "epoch": 0.5306546588153793, "grad_norm": 1.0419095754623413, "learning_rate": 9.475728155339806e-06, "loss": 0.1663, "step": 766 }, { "epoch": 0.5313474194665743, "grad_norm": 1.1187750101089478, "learning_rate": 9.475034674063801e-06, "loss": 0.1609, "step": 767 }, { "epoch": 0.5320401801177693, "grad_norm": 0.9896152019500732, "learning_rate": 9.474341192787796e-06, "loss": 0.1529, "step": 768 }, { "epoch": 0.5327329407689643, "grad_norm": 1.0580388307571411, "learning_rate": 9.47364771151179e-06, "loss": 0.177, "step": 769 }, { "epoch": 0.5334257014201593, "grad_norm": 1.0653841495513916, "learning_rate": 9.472954230235784e-06, "loss": 0.1877, "step": 770 }, { "epoch": 0.5341184620713544, "grad_norm": 1.161211609840393, "learning_rate": 9.47226074895978e-06, "loss": 0.1858, "step": 771 }, { "epoch": 0.5348112227225493, "grad_norm": 1.0894384384155273, "learning_rate": 9.471567267683774e-06, "loss": 0.2039, "step": 772 }, { "epoch": 0.5355039833737444, "grad_norm": 1.0264865159988403, "learning_rate": 9.470873786407767e-06, "loss": 0.1519, "step": 773 }, { "epoch": 0.5361967440249393, "grad_norm": 1.2336781024932861, "learning_rate": 9.470180305131762e-06, "loss": 0.1956, "step": 774 }, { "epoch": 0.5368895046761344, "grad_norm": 1.064982533454895, "learning_rate": 9.469486823855757e-06, "loss": 0.1591, "step": 775 }, { "epoch": 0.5375822653273294, "grad_norm": 1.0894606113433838, "learning_rate": 9.46879334257975e-06, "loss": 0.1708, "step": 776 }, { "epoch": 0.5382750259785244, "grad_norm": 0.9786089062690735, "learning_rate": 9.468099861303745e-06, "loss": 0.1295, "step": 777 }, { "epoch": 0.5389677866297194, "grad_norm": 1.1241366863250732, "learning_rate": 9.46740638002774e-06, "loss": 0.1713, "step": 778 }, { "epoch": 0.5396605472809144, "grad_norm": 1.055454969406128, "learning_rate": 9.466712898751734e-06, "loss": 0.1754, "step": 779 }, { "epoch": 0.5403533079321095, "grad_norm": 1.2188910245895386, "learning_rate": 9.466019417475729e-06, "loss": 0.2029, "step": 780 }, { "epoch": 0.5410460685833045, "grad_norm": 0.9836776852607727, "learning_rate": 9.465325936199723e-06, "loss": 0.1491, "step": 781 }, { "epoch": 0.5417388292344995, "grad_norm": 1.0582470893859863, "learning_rate": 9.464632454923718e-06, "loss": 0.1693, "step": 782 }, { "epoch": 0.5424315898856945, "grad_norm": 1.2043572664260864, "learning_rate": 9.463938973647713e-06, "loss": 0.1891, "step": 783 }, { "epoch": 0.5431243505368895, "grad_norm": 1.095377802848816, "learning_rate": 9.463245492371707e-06, "loss": 0.1809, "step": 784 }, { "epoch": 0.5438171111880845, "grad_norm": 1.0977164506912231, "learning_rate": 9.462552011095702e-06, "loss": 0.1703, "step": 785 }, { "epoch": 0.5445098718392796, "grad_norm": 1.1873396635055542, "learning_rate": 9.461858529819695e-06, "loss": 0.142, "step": 786 }, { "epoch": 0.5452026324904745, "grad_norm": 1.0897413492202759, "learning_rate": 9.46116504854369e-06, "loss": 0.1657, "step": 787 }, { "epoch": 0.5458953931416696, "grad_norm": 1.0887234210968018, "learning_rate": 9.460471567267685e-06, "loss": 0.1708, "step": 788 }, { "epoch": 0.5465881537928645, "grad_norm": 1.056678056716919, "learning_rate": 9.45977808599168e-06, "loss": 0.1517, "step": 789 }, { "epoch": 0.5472809144440596, "grad_norm": 1.0199482440948486, "learning_rate": 9.459084604715674e-06, "loss": 0.1447, "step": 790 }, { "epoch": 0.5479736750952546, "grad_norm": 1.1114829778671265, "learning_rate": 9.458391123439668e-06, "loss": 0.1788, "step": 791 }, { "epoch": 0.5486664357464496, "grad_norm": 1.0675311088562012, "learning_rate": 9.457697642163663e-06, "loss": 0.1678, "step": 792 }, { "epoch": 0.5493591963976446, "grad_norm": 1.1933588981628418, "learning_rate": 9.457004160887658e-06, "loss": 0.1897, "step": 793 }, { "epoch": 0.5500519570488396, "grad_norm": 0.9565851092338562, "learning_rate": 9.45631067961165e-06, "loss": 0.1556, "step": 794 }, { "epoch": 0.5507447177000346, "grad_norm": 1.0294686555862427, "learning_rate": 9.455617198335646e-06, "loss": 0.1564, "step": 795 }, { "epoch": 0.5514374783512297, "grad_norm": 1.1544939279556274, "learning_rate": 9.454923717059639e-06, "loss": 0.1808, "step": 796 }, { "epoch": 0.5521302390024246, "grad_norm": 1.0158432722091675, "learning_rate": 9.454230235783634e-06, "loss": 0.1758, "step": 797 }, { "epoch": 0.5528229996536197, "grad_norm": 1.0488780736923218, "learning_rate": 9.453536754507629e-06, "loss": 0.1514, "step": 798 }, { "epoch": 0.5535157603048146, "grad_norm": 1.0166054964065552, "learning_rate": 9.452843273231624e-06, "loss": 0.1699, "step": 799 }, { "epoch": 0.5542085209560097, "grad_norm": 1.273810863494873, "learning_rate": 9.452149791955619e-06, "loss": 0.1872, "step": 800 }, { "epoch": 0.5549012816072048, "grad_norm": 1.1096241474151611, "learning_rate": 9.451456310679612e-06, "loss": 0.2005, "step": 801 }, { "epoch": 0.5555940422583997, "grad_norm": 0.938686192035675, "learning_rate": 9.450762829403607e-06, "loss": 0.1365, "step": 802 }, { "epoch": 0.5562868029095948, "grad_norm": 1.1645365953445435, "learning_rate": 9.450069348127602e-06, "loss": 0.1654, "step": 803 }, { "epoch": 0.5569795635607897, "grad_norm": 1.0550135374069214, "learning_rate": 9.449375866851595e-06, "loss": 0.147, "step": 804 }, { "epoch": 0.5576723242119848, "grad_norm": 1.1085137128829956, "learning_rate": 9.44868238557559e-06, "loss": 0.17, "step": 805 }, { "epoch": 0.5583650848631798, "grad_norm": 1.0627312660217285, "learning_rate": 9.447988904299585e-06, "loss": 0.166, "step": 806 }, { "epoch": 0.5590578455143748, "grad_norm": 1.2298170328140259, "learning_rate": 9.44729542302358e-06, "loss": 0.1824, "step": 807 }, { "epoch": 0.5597506061655698, "grad_norm": 1.083337426185608, "learning_rate": 9.446601941747575e-06, "loss": 0.1601, "step": 808 }, { "epoch": 0.5604433668167648, "grad_norm": 1.0384013652801514, "learning_rate": 9.445908460471568e-06, "loss": 0.159, "step": 809 }, { "epoch": 0.5611361274679598, "grad_norm": 1.178229808807373, "learning_rate": 9.445214979195563e-06, "loss": 0.1669, "step": 810 }, { "epoch": 0.5618288881191549, "grad_norm": 1.1300045251846313, "learning_rate": 9.444521497919556e-06, "loss": 0.1729, "step": 811 }, { "epoch": 0.5625216487703498, "grad_norm": 0.940787672996521, "learning_rate": 9.443828016643551e-06, "loss": 0.1478, "step": 812 }, { "epoch": 0.5632144094215449, "grad_norm": 1.126418948173523, "learning_rate": 9.443134535367546e-06, "loss": 0.1962, "step": 813 }, { "epoch": 0.5639071700727398, "grad_norm": 1.0317989587783813, "learning_rate": 9.44244105409154e-06, "loss": 0.1548, "step": 814 }, { "epoch": 0.5645999307239349, "grad_norm": 1.2981680631637573, "learning_rate": 9.441747572815534e-06, "loss": 0.1905, "step": 815 }, { "epoch": 0.5652926913751299, "grad_norm": 0.9974861741065979, "learning_rate": 9.44105409153953e-06, "loss": 0.1447, "step": 816 }, { "epoch": 0.5659854520263249, "grad_norm": 1.0251084566116333, "learning_rate": 9.440360610263524e-06, "loss": 0.146, "step": 817 }, { "epoch": 0.5666782126775199, "grad_norm": 0.9431449174880981, "learning_rate": 9.439667128987519e-06, "loss": 0.1516, "step": 818 }, { "epoch": 0.5673709733287149, "grad_norm": 1.076874852180481, "learning_rate": 9.438973647711512e-06, "loss": 0.1747, "step": 819 }, { "epoch": 0.56806373397991, "grad_norm": 1.1013811826705933, "learning_rate": 9.438280166435507e-06, "loss": 0.1674, "step": 820 }, { "epoch": 0.568756494631105, "grad_norm": 1.1640971899032593, "learning_rate": 9.4375866851595e-06, "loss": 0.1457, "step": 821 }, { "epoch": 0.5694492552823, "grad_norm": 1.1467467546463013, "learning_rate": 9.436893203883495e-06, "loss": 0.1752, "step": 822 }, { "epoch": 0.570142015933495, "grad_norm": 1.1216028928756714, "learning_rate": 9.43619972260749e-06, "loss": 0.169, "step": 823 }, { "epoch": 0.57083477658469, "grad_norm": 1.068642020225525, "learning_rate": 9.435506241331485e-06, "loss": 0.1912, "step": 824 }, { "epoch": 0.571527537235885, "grad_norm": 1.0372047424316406, "learning_rate": 9.43481276005548e-06, "loss": 0.1722, "step": 825 }, { "epoch": 0.5722202978870801, "grad_norm": 1.0227689743041992, "learning_rate": 9.434119278779473e-06, "loss": 0.1651, "step": 826 }, { "epoch": 0.572913058538275, "grad_norm": 0.9313246607780457, "learning_rate": 9.433425797503468e-06, "loss": 0.1704, "step": 827 }, { "epoch": 0.5736058191894701, "grad_norm": 0.9683116674423218, "learning_rate": 9.432732316227463e-06, "loss": 0.1536, "step": 828 }, { "epoch": 0.574298579840665, "grad_norm": 1.049546241760254, "learning_rate": 9.432038834951457e-06, "loss": 0.1934, "step": 829 }, { "epoch": 0.5749913404918601, "grad_norm": 1.0880478620529175, "learning_rate": 9.431345353675451e-06, "loss": 0.167, "step": 830 }, { "epoch": 0.5756841011430551, "grad_norm": 0.9867199659347534, "learning_rate": 9.430651872399445e-06, "loss": 0.1683, "step": 831 }, { "epoch": 0.5763768617942501, "grad_norm": 1.1325767040252686, "learning_rate": 9.42995839112344e-06, "loss": 0.1769, "step": 832 }, { "epoch": 0.5770696224454451, "grad_norm": 1.094765543937683, "learning_rate": 9.429264909847435e-06, "loss": 0.1488, "step": 833 }, { "epoch": 0.5777623830966401, "grad_norm": 1.150747537612915, "learning_rate": 9.42857142857143e-06, "loss": 0.1855, "step": 834 }, { "epoch": 0.5784551437478351, "grad_norm": 1.2596840858459473, "learning_rate": 9.427877947295424e-06, "loss": 0.1901, "step": 835 }, { "epoch": 0.5791479043990302, "grad_norm": 1.0859004259109497, "learning_rate": 9.427184466019418e-06, "loss": 0.1711, "step": 836 }, { "epoch": 0.5798406650502251, "grad_norm": 1.0970268249511719, "learning_rate": 9.426490984743413e-06, "loss": 0.1829, "step": 837 }, { "epoch": 0.5805334257014202, "grad_norm": 0.987734317779541, "learning_rate": 9.425797503467408e-06, "loss": 0.1337, "step": 838 }, { "epoch": 0.5812261863526151, "grad_norm": 0.9403496980667114, "learning_rate": 9.4251040221914e-06, "loss": 0.1426, "step": 839 }, { "epoch": 0.5819189470038102, "grad_norm": 1.0452890396118164, "learning_rate": 9.424410540915396e-06, "loss": 0.1731, "step": 840 }, { "epoch": 0.5826117076550053, "grad_norm": 1.207574486732483, "learning_rate": 9.42371705963939e-06, "loss": 0.1846, "step": 841 }, { "epoch": 0.5833044683062002, "grad_norm": 1.1449249982833862, "learning_rate": 9.423023578363386e-06, "loss": 0.1648, "step": 842 }, { "epoch": 0.5839972289573953, "grad_norm": 1.1291990280151367, "learning_rate": 9.42233009708738e-06, "loss": 0.1725, "step": 843 }, { "epoch": 0.5846899896085902, "grad_norm": 1.0940741300582886, "learning_rate": 9.421636615811374e-06, "loss": 0.1713, "step": 844 }, { "epoch": 0.5853827502597853, "grad_norm": 1.090114712715149, "learning_rate": 9.420943134535369e-06, "loss": 0.1899, "step": 845 }, { "epoch": 0.5860755109109802, "grad_norm": 1.1002274751663208, "learning_rate": 9.420249653259362e-06, "loss": 0.1623, "step": 846 }, { "epoch": 0.5867682715621753, "grad_norm": 1.200971245765686, "learning_rate": 9.419556171983357e-06, "loss": 0.1813, "step": 847 }, { "epoch": 0.5874610322133703, "grad_norm": 1.0063960552215576, "learning_rate": 9.418862690707352e-06, "loss": 0.1723, "step": 848 }, { "epoch": 0.5881537928645653, "grad_norm": 1.2347016334533691, "learning_rate": 9.418169209431347e-06, "loss": 0.1865, "step": 849 }, { "epoch": 0.5888465535157603, "grad_norm": 1.1809277534484863, "learning_rate": 9.41747572815534e-06, "loss": 0.1778, "step": 850 }, { "epoch": 0.5895393141669553, "grad_norm": 0.9739560484886169, "learning_rate": 9.416782246879335e-06, "loss": 0.1391, "step": 851 }, { "epoch": 0.5902320748181503, "grad_norm": 1.0778708457946777, "learning_rate": 9.41608876560333e-06, "loss": 0.1648, "step": 852 }, { "epoch": 0.5909248354693454, "grad_norm": 1.2798731327056885, "learning_rate": 9.415395284327325e-06, "loss": 0.1685, "step": 853 }, { "epoch": 0.5916175961205403, "grad_norm": 1.5980770587921143, "learning_rate": 9.414701803051318e-06, "loss": 0.1503, "step": 854 }, { "epoch": 0.5923103567717354, "grad_norm": 1.0819963216781616, "learning_rate": 9.414008321775313e-06, "loss": 0.1435, "step": 855 }, { "epoch": 0.5930031174229303, "grad_norm": 1.1285098791122437, "learning_rate": 9.413314840499306e-06, "loss": 0.171, "step": 856 }, { "epoch": 0.5936958780741254, "grad_norm": 0.9913773536682129, "learning_rate": 9.412621359223301e-06, "loss": 0.1453, "step": 857 }, { "epoch": 0.5943886387253204, "grad_norm": 1.1429187059402466, "learning_rate": 9.411927877947296e-06, "loss": 0.1559, "step": 858 }, { "epoch": 0.5950813993765154, "grad_norm": 1.103598952293396, "learning_rate": 9.411234396671291e-06, "loss": 0.1334, "step": 859 }, { "epoch": 0.5957741600277104, "grad_norm": 1.1337062120437622, "learning_rate": 9.410540915395286e-06, "loss": 0.1663, "step": 860 }, { "epoch": 0.5964669206789054, "grad_norm": 1.2121385335922241, "learning_rate": 9.40984743411928e-06, "loss": 0.1767, "step": 861 }, { "epoch": 0.5971596813301004, "grad_norm": 1.0137088298797607, "learning_rate": 9.409153952843274e-06, "loss": 0.1552, "step": 862 }, { "epoch": 0.5978524419812955, "grad_norm": 1.0943708419799805, "learning_rate": 9.408460471567269e-06, "loss": 0.1593, "step": 863 }, { "epoch": 0.5985452026324904, "grad_norm": 1.1034287214279175, "learning_rate": 9.407766990291262e-06, "loss": 0.1607, "step": 864 }, { "epoch": 0.5992379632836855, "grad_norm": 1.1569900512695312, "learning_rate": 9.407073509015257e-06, "loss": 0.1645, "step": 865 }, { "epoch": 0.5999307239348804, "grad_norm": 1.1737624406814575, "learning_rate": 9.406380027739252e-06, "loss": 0.1903, "step": 866 }, { "epoch": 0.6006234845860755, "grad_norm": 0.9703179001808167, "learning_rate": 9.405686546463247e-06, "loss": 0.1402, "step": 867 }, { "epoch": 0.6013162452372706, "grad_norm": 1.1867642402648926, "learning_rate": 9.404993065187242e-06, "loss": 0.1722, "step": 868 }, { "epoch": 0.6020090058884655, "grad_norm": 1.0768351554870605, "learning_rate": 9.404299583911235e-06, "loss": 0.1674, "step": 869 }, { "epoch": 0.6027017665396606, "grad_norm": 1.1360597610473633, "learning_rate": 9.40360610263523e-06, "loss": 0.1669, "step": 870 }, { "epoch": 0.6033945271908555, "grad_norm": 1.0119037628173828, "learning_rate": 9.402912621359223e-06, "loss": 0.16, "step": 871 }, { "epoch": 0.6040872878420506, "grad_norm": 1.1194970607757568, "learning_rate": 9.402219140083218e-06, "loss": 0.1936, "step": 872 }, { "epoch": 0.6047800484932456, "grad_norm": 1.0727914571762085, "learning_rate": 9.401525658807213e-06, "loss": 0.152, "step": 873 }, { "epoch": 0.6054728091444406, "grad_norm": 1.087863564491272, "learning_rate": 9.400832177531207e-06, "loss": 0.15, "step": 874 }, { "epoch": 0.6061655697956356, "grad_norm": 0.9721863269805908, "learning_rate": 9.400138696255201e-06, "loss": 0.1494, "step": 875 }, { "epoch": 0.6068583304468306, "grad_norm": 1.1647229194641113, "learning_rate": 9.399445214979196e-06, "loss": 0.1827, "step": 876 }, { "epoch": 0.6075510910980256, "grad_norm": 1.0240576267242432, "learning_rate": 9.398751733703191e-06, "loss": 0.157, "step": 877 }, { "epoch": 0.6082438517492207, "grad_norm": 1.128947377204895, "learning_rate": 9.398058252427186e-06, "loss": 0.171, "step": 878 }, { "epoch": 0.6089366124004156, "grad_norm": 1.165592908859253, "learning_rate": 9.39736477115118e-06, "loss": 0.1586, "step": 879 }, { "epoch": 0.6096293730516107, "grad_norm": 1.0030488967895508, "learning_rate": 9.396671289875174e-06, "loss": 0.1414, "step": 880 }, { "epoch": 0.6103221337028056, "grad_norm": 1.1792995929718018, "learning_rate": 9.395977808599168e-06, "loss": 0.2094, "step": 881 }, { "epoch": 0.6110148943540007, "grad_norm": 1.0264792442321777, "learning_rate": 9.395284327323163e-06, "loss": 0.1515, "step": 882 }, { "epoch": 0.6117076550051957, "grad_norm": 1.0973026752471924, "learning_rate": 9.394590846047158e-06, "loss": 0.1903, "step": 883 }, { "epoch": 0.6124004156563907, "grad_norm": 1.005173921585083, "learning_rate": 9.393897364771152e-06, "loss": 0.1404, "step": 884 }, { "epoch": 0.6130931763075858, "grad_norm": 1.0814557075500488, "learning_rate": 9.393203883495147e-06, "loss": 0.2041, "step": 885 }, { "epoch": 0.6137859369587807, "grad_norm": 1.2196296453475952, "learning_rate": 9.39251040221914e-06, "loss": 0.1873, "step": 886 }, { "epoch": 0.6144786976099758, "grad_norm": 0.9337116479873657, "learning_rate": 9.391816920943136e-06, "loss": 0.1468, "step": 887 }, { "epoch": 0.6151714582611708, "grad_norm": 0.9686228036880493, "learning_rate": 9.39112343966713e-06, "loss": 0.1357, "step": 888 }, { "epoch": 0.6158642189123658, "grad_norm": 1.1271620988845825, "learning_rate": 9.390429958391124e-06, "loss": 0.1891, "step": 889 }, { "epoch": 0.6165569795635608, "grad_norm": 1.0006288290023804, "learning_rate": 9.389736477115119e-06, "loss": 0.1632, "step": 890 }, { "epoch": 0.6172497402147558, "grad_norm": 1.1276105642318726, "learning_rate": 9.389042995839112e-06, "loss": 0.1847, "step": 891 }, { "epoch": 0.6179425008659508, "grad_norm": 0.9748163223266602, "learning_rate": 9.388349514563107e-06, "loss": 0.1561, "step": 892 }, { "epoch": 0.6186352615171459, "grad_norm": 1.104117751121521, "learning_rate": 9.387656033287102e-06, "loss": 0.184, "step": 893 }, { "epoch": 0.6193280221683408, "grad_norm": 0.9743916392326355, "learning_rate": 9.386962552011097e-06, "loss": 0.1528, "step": 894 }, { "epoch": 0.6200207828195359, "grad_norm": 1.0051432847976685, "learning_rate": 9.386269070735092e-06, "loss": 0.1343, "step": 895 }, { "epoch": 0.6207135434707308, "grad_norm": 1.1347147226333618, "learning_rate": 9.385575589459085e-06, "loss": 0.1689, "step": 896 }, { "epoch": 0.6214063041219259, "grad_norm": 1.051589012145996, "learning_rate": 9.38488210818308e-06, "loss": 0.1561, "step": 897 }, { "epoch": 0.6220990647731209, "grad_norm": 1.0692572593688965, "learning_rate": 9.384188626907075e-06, "loss": 0.1738, "step": 898 }, { "epoch": 0.6227918254243159, "grad_norm": 0.9588863253593445, "learning_rate": 9.383495145631068e-06, "loss": 0.1536, "step": 899 }, { "epoch": 0.6234845860755109, "grad_norm": 0.991447389125824, "learning_rate": 9.382801664355063e-06, "loss": 0.1435, "step": 900 }, { "epoch": 0.6241773467267059, "grad_norm": 1.0007715225219727, "learning_rate": 9.382108183079058e-06, "loss": 0.1481, "step": 901 }, { "epoch": 0.6248701073779009, "grad_norm": 1.085169792175293, "learning_rate": 9.381414701803053e-06, "loss": 0.1542, "step": 902 }, { "epoch": 0.625562868029096, "grad_norm": 0.9361264109611511, "learning_rate": 9.380721220527048e-06, "loss": 0.1316, "step": 903 }, { "epoch": 0.6262556286802909, "grad_norm": 1.097924828529358, "learning_rate": 9.380027739251041e-06, "loss": 0.1684, "step": 904 }, { "epoch": 0.626948389331486, "grad_norm": 1.149580717086792, "learning_rate": 9.379334257975036e-06, "loss": 0.1706, "step": 905 }, { "epoch": 0.627641149982681, "grad_norm": 1.1573041677474976, "learning_rate": 9.37864077669903e-06, "loss": 0.1787, "step": 906 }, { "epoch": 0.628333910633876, "grad_norm": 1.0068886280059814, "learning_rate": 9.377947295423024e-06, "loss": 0.1437, "step": 907 }, { "epoch": 0.6290266712850711, "grad_norm": 1.1588560342788696, "learning_rate": 9.377253814147019e-06, "loss": 0.1749, "step": 908 }, { "epoch": 0.629719431936266, "grad_norm": 1.0714843273162842, "learning_rate": 9.376560332871012e-06, "loss": 0.1578, "step": 909 }, { "epoch": 0.6304121925874611, "grad_norm": 1.195635199546814, "learning_rate": 9.375866851595007e-06, "loss": 0.181, "step": 910 }, { "epoch": 0.631104953238656, "grad_norm": 1.0656758546829224, "learning_rate": 9.375173370319002e-06, "loss": 0.1799, "step": 911 }, { "epoch": 0.6317977138898511, "grad_norm": 0.9909989833831787, "learning_rate": 9.374479889042997e-06, "loss": 0.1494, "step": 912 }, { "epoch": 0.6324904745410461, "grad_norm": 1.0120370388031006, "learning_rate": 9.373786407766992e-06, "loss": 0.1609, "step": 913 }, { "epoch": 0.6331832351922411, "grad_norm": 1.131453514099121, "learning_rate": 9.373092926490985e-06, "loss": 0.1554, "step": 914 }, { "epoch": 0.6338759958434361, "grad_norm": 1.067887783050537, "learning_rate": 9.37239944521498e-06, "loss": 0.1474, "step": 915 }, { "epoch": 0.6345687564946311, "grad_norm": 1.0186927318572998, "learning_rate": 9.371705963938973e-06, "loss": 0.1753, "step": 916 }, { "epoch": 0.6352615171458261, "grad_norm": 1.1581684350967407, "learning_rate": 9.371012482662968e-06, "loss": 0.1716, "step": 917 }, { "epoch": 0.6359542777970212, "grad_norm": 1.0602717399597168, "learning_rate": 9.370319001386963e-06, "loss": 0.1518, "step": 918 }, { "epoch": 0.6366470384482161, "grad_norm": 0.9391573071479797, "learning_rate": 9.369625520110958e-06, "loss": 0.154, "step": 919 }, { "epoch": 0.6373397990994112, "grad_norm": 0.9334474802017212, "learning_rate": 9.368932038834953e-06, "loss": 0.1729, "step": 920 }, { "epoch": 0.6380325597506061, "grad_norm": 1.1120349168777466, "learning_rate": 9.368238557558946e-06, "loss": 0.1823, "step": 921 }, { "epoch": 0.6387253204018012, "grad_norm": 1.159719705581665, "learning_rate": 9.367545076282941e-06, "loss": 0.1734, "step": 922 }, { "epoch": 0.6394180810529961, "grad_norm": 0.9645901322364807, "learning_rate": 9.366851595006936e-06, "loss": 0.1622, "step": 923 }, { "epoch": 0.6401108417041912, "grad_norm": 1.1247612237930298, "learning_rate": 9.36615811373093e-06, "loss": 0.1832, "step": 924 }, { "epoch": 0.6408036023553862, "grad_norm": 1.1481128931045532, "learning_rate": 9.365464632454924e-06, "loss": 0.1846, "step": 925 }, { "epoch": 0.6414963630065812, "grad_norm": 1.0706948041915894, "learning_rate": 9.36477115117892e-06, "loss": 0.1957, "step": 926 }, { "epoch": 0.6421891236577763, "grad_norm": 1.136802315711975, "learning_rate": 9.364077669902913e-06, "loss": 0.1927, "step": 927 }, { "epoch": 0.6428818843089712, "grad_norm": 1.1108746528625488, "learning_rate": 9.363384188626908e-06, "loss": 0.1714, "step": 928 }, { "epoch": 0.6435746449601663, "grad_norm": 1.1542079448699951, "learning_rate": 9.362690707350902e-06, "loss": 0.1624, "step": 929 }, { "epoch": 0.6442674056113613, "grad_norm": 0.9668132066726685, "learning_rate": 9.361997226074897e-06, "loss": 0.1498, "step": 930 }, { "epoch": 0.6449601662625563, "grad_norm": 0.997539222240448, "learning_rate": 9.36130374479889e-06, "loss": 0.1684, "step": 931 }, { "epoch": 0.6456529269137513, "grad_norm": 1.1347752809524536, "learning_rate": 9.360610263522886e-06, "loss": 0.1916, "step": 932 }, { "epoch": 0.6463456875649463, "grad_norm": 1.0294948816299438, "learning_rate": 9.35991678224688e-06, "loss": 0.1711, "step": 933 }, { "epoch": 0.6470384482161413, "grad_norm": 1.0586535930633545, "learning_rate": 9.359223300970874e-06, "loss": 0.1698, "step": 934 }, { "epoch": 0.6477312088673364, "grad_norm": 0.9234961867332458, "learning_rate": 9.358529819694869e-06, "loss": 0.1513, "step": 935 }, { "epoch": 0.6484239695185313, "grad_norm": 1.0497515201568604, "learning_rate": 9.357836338418864e-06, "loss": 0.1767, "step": 936 }, { "epoch": 0.6491167301697264, "grad_norm": 1.0686051845550537, "learning_rate": 9.357142857142859e-06, "loss": 0.1691, "step": 937 }, { "epoch": 0.6498094908209213, "grad_norm": 1.0756900310516357, "learning_rate": 9.356449375866853e-06, "loss": 0.169, "step": 938 }, { "epoch": 0.6505022514721164, "grad_norm": 1.0305697917938232, "learning_rate": 9.355755894590847e-06, "loss": 0.1614, "step": 939 }, { "epoch": 0.6511950121233114, "grad_norm": 1.0718286037445068, "learning_rate": 9.355062413314842e-06, "loss": 0.1552, "step": 940 }, { "epoch": 0.6518877727745064, "grad_norm": 0.9616384506225586, "learning_rate": 9.354368932038835e-06, "loss": 0.1328, "step": 941 }, { "epoch": 0.6525805334257014, "grad_norm": 1.1087232828140259, "learning_rate": 9.35367545076283e-06, "loss": 0.1723, "step": 942 }, { "epoch": 0.6532732940768964, "grad_norm": 1.084382176399231, "learning_rate": 9.352981969486825e-06, "loss": 0.1689, "step": 943 }, { "epoch": 0.6539660547280914, "grad_norm": 1.0610822439193726, "learning_rate": 9.35228848821082e-06, "loss": 0.161, "step": 944 }, { "epoch": 0.6546588153792865, "grad_norm": 1.0572192668914795, "learning_rate": 9.351595006934815e-06, "loss": 0.1412, "step": 945 }, { "epoch": 0.6553515760304814, "grad_norm": 0.9759120941162109, "learning_rate": 9.350901525658808e-06, "loss": 0.146, "step": 946 }, { "epoch": 0.6560443366816765, "grad_norm": 1.085342526435852, "learning_rate": 9.350208044382803e-06, "loss": 0.1497, "step": 947 }, { "epoch": 0.6567370973328714, "grad_norm": 0.9910585284233093, "learning_rate": 9.349514563106798e-06, "loss": 0.1466, "step": 948 }, { "epoch": 0.6574298579840665, "grad_norm": 0.9044740200042725, "learning_rate": 9.348821081830791e-06, "loss": 0.1232, "step": 949 }, { "epoch": 0.6581226186352616, "grad_norm": 1.0807905197143555, "learning_rate": 9.348127600554786e-06, "loss": 0.1817, "step": 950 }, { "epoch": 0.6588153792864565, "grad_norm": 1.0731427669525146, "learning_rate": 9.347434119278779e-06, "loss": 0.1786, "step": 951 }, { "epoch": 0.6595081399376516, "grad_norm": 1.138004183769226, "learning_rate": 9.346740638002774e-06, "loss": 0.1871, "step": 952 }, { "epoch": 0.6602009005888465, "grad_norm": 1.1381019353866577, "learning_rate": 9.346047156726769e-06, "loss": 0.186, "step": 953 }, { "epoch": 0.6608936612400416, "grad_norm": 1.1066994667053223, "learning_rate": 9.345353675450764e-06, "loss": 0.1664, "step": 954 }, { "epoch": 0.6615864218912366, "grad_norm": 1.055285096168518, "learning_rate": 9.344660194174759e-06, "loss": 0.1848, "step": 955 }, { "epoch": 0.6622791825424316, "grad_norm": 1.1309658288955688, "learning_rate": 9.343966712898752e-06, "loss": 0.1718, "step": 956 }, { "epoch": 0.6629719431936266, "grad_norm": 1.0387343168258667, "learning_rate": 9.343273231622747e-06, "loss": 0.1842, "step": 957 }, { "epoch": 0.6636647038448216, "grad_norm": 0.9196197986602783, "learning_rate": 9.342579750346742e-06, "loss": 0.1239, "step": 958 }, { "epoch": 0.6643574644960166, "grad_norm": 1.1281828880310059, "learning_rate": 9.341886269070735e-06, "loss": 0.2056, "step": 959 }, { "epoch": 0.6650502251472117, "grad_norm": 0.9667572975158691, "learning_rate": 9.34119278779473e-06, "loss": 0.1448, "step": 960 }, { "epoch": 0.6657429857984066, "grad_norm": 1.164891004562378, "learning_rate": 9.340499306518725e-06, "loss": 0.1525, "step": 961 }, { "epoch": 0.6664357464496017, "grad_norm": 1.0004878044128418, "learning_rate": 9.33980582524272e-06, "loss": 0.1797, "step": 962 }, { "epoch": 0.6671285071007966, "grad_norm": 1.0409966707229614, "learning_rate": 9.339112343966715e-06, "loss": 0.1636, "step": 963 }, { "epoch": 0.6678212677519917, "grad_norm": 1.0098788738250732, "learning_rate": 9.338418862690708e-06, "loss": 0.1641, "step": 964 }, { "epoch": 0.6685140284031867, "grad_norm": 1.109473466873169, "learning_rate": 9.337725381414703e-06, "loss": 0.1467, "step": 965 }, { "epoch": 0.6692067890543817, "grad_norm": 1.0836076736450195, "learning_rate": 9.337031900138696e-06, "loss": 0.1467, "step": 966 }, { "epoch": 0.6698995497055767, "grad_norm": 1.0216044187545776, "learning_rate": 9.336338418862691e-06, "loss": 0.1667, "step": 967 }, { "epoch": 0.6705923103567717, "grad_norm": 0.8676590919494629, "learning_rate": 9.335644937586686e-06, "loss": 0.1303, "step": 968 }, { "epoch": 0.6712850710079667, "grad_norm": 1.0340557098388672, "learning_rate": 9.33495145631068e-06, "loss": 0.146, "step": 969 }, { "epoch": 0.6719778316591618, "grad_norm": 1.0925043821334839, "learning_rate": 9.334257975034674e-06, "loss": 0.1651, "step": 970 }, { "epoch": 0.6726705923103568, "grad_norm": 1.1762585639953613, "learning_rate": 9.33356449375867e-06, "loss": 0.1832, "step": 971 }, { "epoch": 0.6733633529615518, "grad_norm": 1.0555940866470337, "learning_rate": 9.332871012482664e-06, "loss": 0.175, "step": 972 }, { "epoch": 0.6740561136127468, "grad_norm": 1.0787198543548584, "learning_rate": 9.33217753120666e-06, "loss": 0.1557, "step": 973 }, { "epoch": 0.6747488742639418, "grad_norm": 1.0291804075241089, "learning_rate": 9.331484049930652e-06, "loss": 0.1605, "step": 974 }, { "epoch": 0.6754416349151369, "grad_norm": 1.0406602621078491, "learning_rate": 9.330790568654647e-06, "loss": 0.1642, "step": 975 }, { "epoch": 0.6761343955663318, "grad_norm": 0.9932894110679626, "learning_rate": 9.33009708737864e-06, "loss": 0.1631, "step": 976 }, { "epoch": 0.6768271562175269, "grad_norm": 1.0147134065628052, "learning_rate": 9.329403606102636e-06, "loss": 0.1375, "step": 977 }, { "epoch": 0.6775199168687218, "grad_norm": 1.0169036388397217, "learning_rate": 9.32871012482663e-06, "loss": 0.1525, "step": 978 }, { "epoch": 0.6782126775199169, "grad_norm": 1.1349810361862183, "learning_rate": 9.328016643550625e-06, "loss": 0.1808, "step": 979 }, { "epoch": 0.6789054381711119, "grad_norm": 0.9810749292373657, "learning_rate": 9.32732316227462e-06, "loss": 0.1394, "step": 980 }, { "epoch": 0.6795981988223069, "grad_norm": 1.1747621297836304, "learning_rate": 9.326629680998614e-06, "loss": 0.1777, "step": 981 }, { "epoch": 0.6802909594735019, "grad_norm": 1.1135560274124146, "learning_rate": 9.325936199722609e-06, "loss": 0.1543, "step": 982 }, { "epoch": 0.6809837201246969, "grad_norm": 1.1176321506500244, "learning_rate": 9.325242718446603e-06, "loss": 0.1572, "step": 983 }, { "epoch": 0.6816764807758919, "grad_norm": 1.020430564880371, "learning_rate": 9.324549237170597e-06, "loss": 0.1628, "step": 984 }, { "epoch": 0.682369241427087, "grad_norm": 1.08804190158844, "learning_rate": 9.323855755894592e-06, "loss": 0.157, "step": 985 }, { "epoch": 0.6830620020782819, "grad_norm": 1.0574597120285034, "learning_rate": 9.323162274618585e-06, "loss": 0.1508, "step": 986 }, { "epoch": 0.683754762729477, "grad_norm": 1.0497900247573853, "learning_rate": 9.32246879334258e-06, "loss": 0.1711, "step": 987 }, { "epoch": 0.6844475233806719, "grad_norm": 1.1266224384307861, "learning_rate": 9.321775312066575e-06, "loss": 0.1483, "step": 988 }, { "epoch": 0.685140284031867, "grad_norm": 1.0701349973678589, "learning_rate": 9.32108183079057e-06, "loss": 0.1666, "step": 989 }, { "epoch": 0.685833044683062, "grad_norm": 1.1562618017196655, "learning_rate": 9.320388349514565e-06, "loss": 0.1717, "step": 990 }, { "epoch": 0.686525805334257, "grad_norm": 1.1472821235656738, "learning_rate": 9.319694868238558e-06, "loss": 0.1639, "step": 991 }, { "epoch": 0.6872185659854521, "grad_norm": 1.0069383382797241, "learning_rate": 9.319001386962553e-06, "loss": 0.1675, "step": 992 }, { "epoch": 0.687911326636647, "grad_norm": 1.1724313497543335, "learning_rate": 9.318307905686548e-06, "loss": 0.1765, "step": 993 }, { "epoch": 0.6886040872878421, "grad_norm": 0.966568112373352, "learning_rate": 9.317614424410541e-06, "loss": 0.1397, "step": 994 }, { "epoch": 0.6892968479390371, "grad_norm": 1.0228078365325928, "learning_rate": 9.316920943134536e-06, "loss": 0.148, "step": 995 }, { "epoch": 0.6899896085902321, "grad_norm": 1.229444980621338, "learning_rate": 9.31622746185853e-06, "loss": 0.1855, "step": 996 }, { "epoch": 0.6906823692414271, "grad_norm": 1.1054368019104004, "learning_rate": 9.315533980582526e-06, "loss": 0.2115, "step": 997 }, { "epoch": 0.6913751298926221, "grad_norm": 1.0250862836837769, "learning_rate": 9.31484049930652e-06, "loss": 0.1479, "step": 998 }, { "epoch": 0.6920678905438171, "grad_norm": 1.1154900789260864, "learning_rate": 9.314147018030514e-06, "loss": 0.1697, "step": 999 }, { "epoch": 0.6927606511950122, "grad_norm": 1.1613211631774902, "learning_rate": 9.313453536754509e-06, "loss": 0.1604, "step": 1000 }, { "epoch": 0.6934534118462071, "grad_norm": 1.1166093349456787, "learning_rate": 9.312760055478502e-06, "loss": 0.1304, "step": 1001 }, { "epoch": 0.6941461724974022, "grad_norm": 1.0721238851547241, "learning_rate": 9.312066574202497e-06, "loss": 0.1699, "step": 1002 }, { "epoch": 0.6948389331485971, "grad_norm": 1.1429399251937866, "learning_rate": 9.311373092926492e-06, "loss": 0.1922, "step": 1003 }, { "epoch": 0.6955316937997922, "grad_norm": 0.9935634732246399, "learning_rate": 9.310679611650487e-06, "loss": 0.162, "step": 1004 }, { "epoch": 0.6962244544509871, "grad_norm": 1.2032567262649536, "learning_rate": 9.30998613037448e-06, "loss": 0.1599, "step": 1005 }, { "epoch": 0.6969172151021822, "grad_norm": 1.0629124641418457, "learning_rate": 9.309292649098475e-06, "loss": 0.169, "step": 1006 }, { "epoch": 0.6976099757533772, "grad_norm": 0.9449934363365173, "learning_rate": 9.30859916782247e-06, "loss": 0.1529, "step": 1007 }, { "epoch": 0.6983027364045722, "grad_norm": 1.0493741035461426, "learning_rate": 9.307905686546465e-06, "loss": 0.1621, "step": 1008 }, { "epoch": 0.6989954970557672, "grad_norm": 1.028087854385376, "learning_rate": 9.307212205270458e-06, "loss": 0.1714, "step": 1009 }, { "epoch": 0.6996882577069622, "grad_norm": 1.0470004081726074, "learning_rate": 9.306518723994453e-06, "loss": 0.1527, "step": 1010 }, { "epoch": 0.7003810183581572, "grad_norm": 1.0218693017959595, "learning_rate": 9.305825242718446e-06, "loss": 0.143, "step": 1011 }, { "epoch": 0.7010737790093523, "grad_norm": 1.211381196975708, "learning_rate": 9.305131761442441e-06, "loss": 0.1908, "step": 1012 }, { "epoch": 0.7017665396605473, "grad_norm": 1.0903083086013794, "learning_rate": 9.304438280166436e-06, "loss": 0.1742, "step": 1013 }, { "epoch": 0.7024593003117423, "grad_norm": 1.0702459812164307, "learning_rate": 9.303744798890431e-06, "loss": 0.163, "step": 1014 }, { "epoch": 0.7031520609629373, "grad_norm": 1.0752493143081665, "learning_rate": 9.303051317614426e-06, "loss": 0.1461, "step": 1015 }, { "epoch": 0.7038448216141323, "grad_norm": 1.2007278203964233, "learning_rate": 9.30235783633842e-06, "loss": 0.1593, "step": 1016 }, { "epoch": 0.7045375822653274, "grad_norm": 1.0017162561416626, "learning_rate": 9.301664355062414e-06, "loss": 0.1573, "step": 1017 }, { "epoch": 0.7052303429165223, "grad_norm": 1.1920140981674194, "learning_rate": 9.30097087378641e-06, "loss": 0.1812, "step": 1018 }, { "epoch": 0.7059231035677174, "grad_norm": 1.0638477802276611, "learning_rate": 9.300277392510402e-06, "loss": 0.1658, "step": 1019 }, { "epoch": 0.7066158642189123, "grad_norm": 1.1916426420211792, "learning_rate": 9.299583911234397e-06, "loss": 0.1749, "step": 1020 }, { "epoch": 0.7073086248701074, "grad_norm": 0.9912903904914856, "learning_rate": 9.298890429958392e-06, "loss": 0.1566, "step": 1021 }, { "epoch": 0.7080013855213024, "grad_norm": 0.9812451004981995, "learning_rate": 9.298196948682387e-06, "loss": 0.1295, "step": 1022 }, { "epoch": 0.7086941461724974, "grad_norm": 1.126921534538269, "learning_rate": 9.297503467406382e-06, "loss": 0.1689, "step": 1023 }, { "epoch": 0.7093869068236924, "grad_norm": 1.038448691368103, "learning_rate": 9.296809986130375e-06, "loss": 0.1529, "step": 1024 }, { "epoch": 0.7100796674748874, "grad_norm": 1.2521450519561768, "learning_rate": 9.29611650485437e-06, "loss": 0.1731, "step": 1025 }, { "epoch": 0.7107724281260824, "grad_norm": 0.9496546983718872, "learning_rate": 9.295423023578364e-06, "loss": 0.1349, "step": 1026 }, { "epoch": 0.7114651887772775, "grad_norm": 1.1061389446258545, "learning_rate": 9.294729542302359e-06, "loss": 0.1638, "step": 1027 }, { "epoch": 0.7121579494284724, "grad_norm": 1.0456463098526, "learning_rate": 9.294036061026353e-06, "loss": 0.1557, "step": 1028 }, { "epoch": 0.7128507100796675, "grad_norm": 0.9909372329711914, "learning_rate": 9.293342579750347e-06, "loss": 0.154, "step": 1029 }, { "epoch": 0.7135434707308624, "grad_norm": 1.135169506072998, "learning_rate": 9.292649098474342e-06, "loss": 0.171, "step": 1030 }, { "epoch": 0.7142362313820575, "grad_norm": 1.1106867790222168, "learning_rate": 9.291955617198337e-06, "loss": 0.1695, "step": 1031 }, { "epoch": 0.7149289920332526, "grad_norm": 1.0818274021148682, "learning_rate": 9.291262135922331e-06, "loss": 0.1751, "step": 1032 }, { "epoch": 0.7156217526844475, "grad_norm": 1.0426416397094727, "learning_rate": 9.290568654646326e-06, "loss": 0.155, "step": 1033 }, { "epoch": 0.7163145133356426, "grad_norm": 1.0632688999176025, "learning_rate": 9.28987517337032e-06, "loss": 0.1724, "step": 1034 }, { "epoch": 0.7170072739868375, "grad_norm": 1.0620195865631104, "learning_rate": 9.289181692094315e-06, "loss": 0.1766, "step": 1035 }, { "epoch": 0.7177000346380326, "grad_norm": 1.0451894998550415, "learning_rate": 9.288488210818308e-06, "loss": 0.1543, "step": 1036 }, { "epoch": 0.7183927952892276, "grad_norm": 0.9844353795051575, "learning_rate": 9.287794729542303e-06, "loss": 0.1433, "step": 1037 }, { "epoch": 0.7190855559404226, "grad_norm": 1.168127179145813, "learning_rate": 9.287101248266298e-06, "loss": 0.1524, "step": 1038 }, { "epoch": 0.7197783165916176, "grad_norm": 0.9377881288528442, "learning_rate": 9.286407766990293e-06, "loss": 0.1134, "step": 1039 }, { "epoch": 0.7204710772428126, "grad_norm": 1.0388245582580566, "learning_rate": 9.285714285714288e-06, "loss": 0.1528, "step": 1040 }, { "epoch": 0.7211638378940076, "grad_norm": 1.082821249961853, "learning_rate": 9.28502080443828e-06, "loss": 0.1531, "step": 1041 }, { "epoch": 0.7218565985452027, "grad_norm": 0.903874397277832, "learning_rate": 9.284327323162276e-06, "loss": 0.1338, "step": 1042 }, { "epoch": 0.7225493591963976, "grad_norm": 1.0584840774536133, "learning_rate": 9.28363384188627e-06, "loss": 0.1678, "step": 1043 }, { "epoch": 0.7232421198475927, "grad_norm": 1.0977245569229126, "learning_rate": 9.282940360610264e-06, "loss": 0.1528, "step": 1044 }, { "epoch": 0.7239348804987876, "grad_norm": 1.2373993396759033, "learning_rate": 9.282246879334259e-06, "loss": 0.1749, "step": 1045 }, { "epoch": 0.7246276411499827, "grad_norm": 1.0567039251327515, "learning_rate": 9.281553398058252e-06, "loss": 0.1766, "step": 1046 }, { "epoch": 0.7253204018011777, "grad_norm": 0.9542796015739441, "learning_rate": 9.280859916782247e-06, "loss": 0.1533, "step": 1047 }, { "epoch": 0.7260131624523727, "grad_norm": 1.0102726221084595, "learning_rate": 9.280166435506242e-06, "loss": 0.1343, "step": 1048 }, { "epoch": 0.7267059231035677, "grad_norm": 1.1852046251296997, "learning_rate": 9.279472954230237e-06, "loss": 0.1793, "step": 1049 }, { "epoch": 0.7273986837547627, "grad_norm": 1.1553641557693481, "learning_rate": 9.278779472954232e-06, "loss": 0.1757, "step": 1050 }, { "epoch": 0.7280914444059577, "grad_norm": 1.290137529373169, "learning_rate": 9.278085991678225e-06, "loss": 0.1827, "step": 1051 }, { "epoch": 0.7287842050571528, "grad_norm": 1.0179033279418945, "learning_rate": 9.27739251040222e-06, "loss": 0.1448, "step": 1052 }, { "epoch": 0.7294769657083477, "grad_norm": 1.0473142862319946, "learning_rate": 9.276699029126215e-06, "loss": 0.1529, "step": 1053 }, { "epoch": 0.7301697263595428, "grad_norm": 1.0068601369857788, "learning_rate": 9.276005547850208e-06, "loss": 0.1602, "step": 1054 }, { "epoch": 0.7308624870107377, "grad_norm": 1.0162426233291626, "learning_rate": 9.275312066574203e-06, "loss": 0.1511, "step": 1055 }, { "epoch": 0.7315552476619328, "grad_norm": 1.029964566230774, "learning_rate": 9.274618585298198e-06, "loss": 0.1479, "step": 1056 }, { "epoch": 0.7322480083131279, "grad_norm": 1.010059118270874, "learning_rate": 9.273925104022193e-06, "loss": 0.1388, "step": 1057 }, { "epoch": 0.7329407689643228, "grad_norm": 0.9803706407546997, "learning_rate": 9.273231622746188e-06, "loss": 0.1521, "step": 1058 }, { "epoch": 0.7336335296155179, "grad_norm": 1.1875274181365967, "learning_rate": 9.272538141470181e-06, "loss": 0.1636, "step": 1059 }, { "epoch": 0.7343262902667128, "grad_norm": 1.0571045875549316, "learning_rate": 9.271844660194176e-06, "loss": 0.1642, "step": 1060 }, { "epoch": 0.7350190509179079, "grad_norm": 1.180854320526123, "learning_rate": 9.27115117891817e-06, "loss": 0.2017, "step": 1061 }, { "epoch": 0.7357118115691029, "grad_norm": 1.120179533958435, "learning_rate": 9.270457697642164e-06, "loss": 0.1575, "step": 1062 }, { "epoch": 0.7364045722202979, "grad_norm": 0.9948523044586182, "learning_rate": 9.26976421636616e-06, "loss": 0.1329, "step": 1063 }, { "epoch": 0.7370973328714929, "grad_norm": 1.1517481803894043, "learning_rate": 9.269070735090152e-06, "loss": 0.1882, "step": 1064 }, { "epoch": 0.7377900935226879, "grad_norm": 1.1129299402236938, "learning_rate": 9.268377253814147e-06, "loss": 0.1755, "step": 1065 }, { "epoch": 0.7384828541738829, "grad_norm": 1.118369698524475, "learning_rate": 9.267683772538142e-06, "loss": 0.1657, "step": 1066 }, { "epoch": 0.739175614825078, "grad_norm": 1.0388578176498413, "learning_rate": 9.266990291262137e-06, "loss": 0.1499, "step": 1067 }, { "epoch": 0.7398683754762729, "grad_norm": 1.312852144241333, "learning_rate": 9.266296809986132e-06, "loss": 0.1345, "step": 1068 }, { "epoch": 0.740561136127468, "grad_norm": 1.011390209197998, "learning_rate": 9.265603328710125e-06, "loss": 0.1409, "step": 1069 }, { "epoch": 0.7412538967786629, "grad_norm": 0.9556867480278015, "learning_rate": 9.26490984743412e-06, "loss": 0.1349, "step": 1070 }, { "epoch": 0.741946657429858, "grad_norm": 1.0241023302078247, "learning_rate": 9.264216366158114e-06, "loss": 0.1419, "step": 1071 }, { "epoch": 0.742639418081053, "grad_norm": 1.0253700017929077, "learning_rate": 9.263522884882108e-06, "loss": 0.1783, "step": 1072 }, { "epoch": 0.743332178732248, "grad_norm": 1.0557609796524048, "learning_rate": 9.262829403606103e-06, "loss": 0.1725, "step": 1073 }, { "epoch": 0.744024939383443, "grad_norm": 1.0009117126464844, "learning_rate": 9.262135922330098e-06, "loss": 0.1578, "step": 1074 }, { "epoch": 0.744717700034638, "grad_norm": 0.9810420274734497, "learning_rate": 9.261442441054093e-06, "loss": 0.1463, "step": 1075 }, { "epoch": 0.745410460685833, "grad_norm": 1.3629769086837769, "learning_rate": 9.260748959778087e-06, "loss": 0.163, "step": 1076 }, { "epoch": 0.7461032213370281, "grad_norm": 1.088313341140747, "learning_rate": 9.260055478502081e-06, "loss": 0.1408, "step": 1077 }, { "epoch": 0.7467959819882231, "grad_norm": 0.9942247867584229, "learning_rate": 9.259361997226076e-06, "loss": 0.1337, "step": 1078 }, { "epoch": 0.7474887426394181, "grad_norm": 1.0539976358413696, "learning_rate": 9.25866851595007e-06, "loss": 0.1659, "step": 1079 }, { "epoch": 0.7481815032906131, "grad_norm": 1.0356535911560059, "learning_rate": 9.257975034674065e-06, "loss": 0.1572, "step": 1080 }, { "epoch": 0.7488742639418081, "grad_norm": 1.2949830293655396, "learning_rate": 9.25728155339806e-06, "loss": 0.2064, "step": 1081 }, { "epoch": 0.7495670245930032, "grad_norm": 0.9998825192451477, "learning_rate": 9.256588072122053e-06, "loss": 0.1268, "step": 1082 }, { "epoch": 0.7502597852441981, "grad_norm": 1.1159971952438354, "learning_rate": 9.255894590846048e-06, "loss": 0.1631, "step": 1083 }, { "epoch": 0.7509525458953932, "grad_norm": 0.9983909726142883, "learning_rate": 9.255201109570043e-06, "loss": 0.1426, "step": 1084 }, { "epoch": 0.7516453065465881, "grad_norm": 1.0740413665771484, "learning_rate": 9.254507628294038e-06, "loss": 0.1622, "step": 1085 }, { "epoch": 0.7523380671977832, "grad_norm": 1.0700693130493164, "learning_rate": 9.25381414701803e-06, "loss": 0.1583, "step": 1086 }, { "epoch": 0.7530308278489781, "grad_norm": 1.0357578992843628, "learning_rate": 9.253120665742026e-06, "loss": 0.1579, "step": 1087 }, { "epoch": 0.7537235885001732, "grad_norm": 1.112646222114563, "learning_rate": 9.25242718446602e-06, "loss": 0.1759, "step": 1088 }, { "epoch": 0.7544163491513682, "grad_norm": 0.9094178676605225, "learning_rate": 9.251733703190014e-06, "loss": 0.1235, "step": 1089 }, { "epoch": 0.7551091098025632, "grad_norm": 1.0854482650756836, "learning_rate": 9.251040221914009e-06, "loss": 0.1562, "step": 1090 }, { "epoch": 0.7558018704537582, "grad_norm": 1.2528469562530518, "learning_rate": 9.250346740638004e-06, "loss": 0.2051, "step": 1091 }, { "epoch": 0.7564946311049532, "grad_norm": 0.8920885324478149, "learning_rate": 9.249653259361999e-06, "loss": 0.1357, "step": 1092 }, { "epoch": 0.7571873917561482, "grad_norm": 0.9023600816726685, "learning_rate": 9.248959778085992e-06, "loss": 0.1237, "step": 1093 }, { "epoch": 0.7578801524073433, "grad_norm": 1.0625343322753906, "learning_rate": 9.248266296809987e-06, "loss": 0.1655, "step": 1094 }, { "epoch": 0.7585729130585382, "grad_norm": 1.1089736223220825, "learning_rate": 9.247572815533982e-06, "loss": 0.1783, "step": 1095 }, { "epoch": 0.7592656737097333, "grad_norm": 1.058384656906128, "learning_rate": 9.246879334257975e-06, "loss": 0.1581, "step": 1096 }, { "epoch": 0.7599584343609282, "grad_norm": 1.105261206626892, "learning_rate": 9.24618585298197e-06, "loss": 0.1815, "step": 1097 }, { "epoch": 0.7606511950121233, "grad_norm": 0.9062468409538269, "learning_rate": 9.245492371705965e-06, "loss": 0.1406, "step": 1098 }, { "epoch": 0.7613439556633184, "grad_norm": 0.981909990310669, "learning_rate": 9.24479889042996e-06, "loss": 0.1778, "step": 1099 }, { "epoch": 0.7620367163145133, "grad_norm": 1.1163431406021118, "learning_rate": 9.244105409153955e-06, "loss": 0.1896, "step": 1100 }, { "epoch": 0.7627294769657084, "grad_norm": 1.1027591228485107, "learning_rate": 9.243411927877948e-06, "loss": 0.1287, "step": 1101 }, { "epoch": 0.7634222376169033, "grad_norm": 1.017777442932129, "learning_rate": 9.242718446601943e-06, "loss": 0.1676, "step": 1102 }, { "epoch": 0.7641149982680984, "grad_norm": 1.1382927894592285, "learning_rate": 9.242024965325936e-06, "loss": 0.1813, "step": 1103 }, { "epoch": 0.7648077589192934, "grad_norm": 1.0597646236419678, "learning_rate": 9.241331484049931e-06, "loss": 0.1337, "step": 1104 }, { "epoch": 0.7655005195704884, "grad_norm": 0.8857343196868896, "learning_rate": 9.240638002773926e-06, "loss": 0.1308, "step": 1105 }, { "epoch": 0.7661932802216834, "grad_norm": 1.1816799640655518, "learning_rate": 9.23994452149792e-06, "loss": 0.2012, "step": 1106 }, { "epoch": 0.7668860408728784, "grad_norm": 1.0828983783721924, "learning_rate": 9.239251040221914e-06, "loss": 0.1488, "step": 1107 }, { "epoch": 0.7675788015240734, "grad_norm": 0.993518590927124, "learning_rate": 9.23855755894591e-06, "loss": 0.119, "step": 1108 }, { "epoch": 0.7682715621752685, "grad_norm": 1.0390045642852783, "learning_rate": 9.237864077669904e-06, "loss": 0.1567, "step": 1109 }, { "epoch": 0.7689643228264634, "grad_norm": 1.031101107597351, "learning_rate": 9.237170596393899e-06, "loss": 0.1593, "step": 1110 }, { "epoch": 0.7696570834776585, "grad_norm": 1.0703082084655762, "learning_rate": 9.236477115117892e-06, "loss": 0.16, "step": 1111 }, { "epoch": 0.7703498441288534, "grad_norm": 0.9922031760215759, "learning_rate": 9.235783633841887e-06, "loss": 0.1457, "step": 1112 }, { "epoch": 0.7710426047800485, "grad_norm": 1.1097067594528198, "learning_rate": 9.23509015256588e-06, "loss": 0.1583, "step": 1113 }, { "epoch": 0.7717353654312435, "grad_norm": 1.2743453979492188, "learning_rate": 9.234396671289875e-06, "loss": 0.1684, "step": 1114 }, { "epoch": 0.7724281260824385, "grad_norm": 0.9603894948959351, "learning_rate": 9.23370319001387e-06, "loss": 0.1342, "step": 1115 }, { "epoch": 0.7731208867336335, "grad_norm": 1.0211710929870605, "learning_rate": 9.233009708737865e-06, "loss": 0.148, "step": 1116 }, { "epoch": 0.7738136473848285, "grad_norm": 1.2108137607574463, "learning_rate": 9.23231622746186e-06, "loss": 0.1791, "step": 1117 }, { "epoch": 0.7745064080360236, "grad_norm": 1.0738354921340942, "learning_rate": 9.231622746185853e-06, "loss": 0.1775, "step": 1118 }, { "epoch": 0.7751991686872186, "grad_norm": 0.9560322165489197, "learning_rate": 9.230929264909848e-06, "loss": 0.1362, "step": 1119 }, { "epoch": 0.7758919293384136, "grad_norm": 0.9589406847953796, "learning_rate": 9.230235783633843e-06, "loss": 0.1538, "step": 1120 }, { "epoch": 0.7765846899896086, "grad_norm": 0.9562289714813232, "learning_rate": 9.229542302357837e-06, "loss": 0.1369, "step": 1121 }, { "epoch": 0.7772774506408036, "grad_norm": 1.0238457918167114, "learning_rate": 9.228848821081831e-06, "loss": 0.1484, "step": 1122 }, { "epoch": 0.7779702112919986, "grad_norm": 1.0500246286392212, "learning_rate": 9.228155339805825e-06, "loss": 0.1431, "step": 1123 }, { "epoch": 0.7786629719431937, "grad_norm": 0.9847618937492371, "learning_rate": 9.22746185852982e-06, "loss": 0.1379, "step": 1124 }, { "epoch": 0.7793557325943886, "grad_norm": 1.0581865310668945, "learning_rate": 9.226768377253815e-06, "loss": 0.1758, "step": 1125 }, { "epoch": 0.7800484932455837, "grad_norm": 1.0479624271392822, "learning_rate": 9.22607489597781e-06, "loss": 0.1748, "step": 1126 }, { "epoch": 0.7807412538967786, "grad_norm": 1.0092641115188599, "learning_rate": 9.225381414701804e-06, "loss": 0.1671, "step": 1127 }, { "epoch": 0.7814340145479737, "grad_norm": 0.9954952597618103, "learning_rate": 9.224687933425798e-06, "loss": 0.1442, "step": 1128 }, { "epoch": 0.7821267751991687, "grad_norm": 0.9774269461631775, "learning_rate": 9.223994452149793e-06, "loss": 0.1558, "step": 1129 }, { "epoch": 0.7828195358503637, "grad_norm": 1.1028008460998535, "learning_rate": 9.223300970873788e-06, "loss": 0.162, "step": 1130 }, { "epoch": 0.7835122965015587, "grad_norm": 1.1184663772583008, "learning_rate": 9.22260748959778e-06, "loss": 0.1866, "step": 1131 }, { "epoch": 0.7842050571527537, "grad_norm": 0.9057531356811523, "learning_rate": 9.221914008321776e-06, "loss": 0.1432, "step": 1132 }, { "epoch": 0.7848978178039487, "grad_norm": 1.1297880411148071, "learning_rate": 9.22122052704577e-06, "loss": 0.1589, "step": 1133 }, { "epoch": 0.7855905784551438, "grad_norm": 1.0383490324020386, "learning_rate": 9.220527045769766e-06, "loss": 0.1496, "step": 1134 }, { "epoch": 0.7862833391063387, "grad_norm": 0.9274728298187256, "learning_rate": 9.21983356449376e-06, "loss": 0.1353, "step": 1135 }, { "epoch": 0.7869760997575338, "grad_norm": 1.059129238128662, "learning_rate": 9.219140083217754e-06, "loss": 0.1473, "step": 1136 }, { "epoch": 0.7876688604087287, "grad_norm": 0.9455123543739319, "learning_rate": 9.218446601941749e-06, "loss": 0.1427, "step": 1137 }, { "epoch": 0.7883616210599238, "grad_norm": 0.9490697383880615, "learning_rate": 9.217753120665742e-06, "loss": 0.1236, "step": 1138 }, { "epoch": 0.7890543817111189, "grad_norm": 1.120455265045166, "learning_rate": 9.217059639389737e-06, "loss": 0.1466, "step": 1139 }, { "epoch": 0.7897471423623138, "grad_norm": 1.126746654510498, "learning_rate": 9.216366158113732e-06, "loss": 0.1668, "step": 1140 }, { "epoch": 0.7904399030135089, "grad_norm": 1.1976633071899414, "learning_rate": 9.215672676837725e-06, "loss": 0.1642, "step": 1141 }, { "epoch": 0.7911326636647038, "grad_norm": 1.0458179712295532, "learning_rate": 9.21497919556172e-06, "loss": 0.1449, "step": 1142 }, { "epoch": 0.7918254243158989, "grad_norm": 0.9306297302246094, "learning_rate": 9.214285714285715e-06, "loss": 0.1553, "step": 1143 }, { "epoch": 0.7925181849670939, "grad_norm": 1.0312331914901733, "learning_rate": 9.21359223300971e-06, "loss": 0.1533, "step": 1144 }, { "epoch": 0.7932109456182889, "grad_norm": 1.0560755729675293, "learning_rate": 9.212898751733705e-06, "loss": 0.1542, "step": 1145 }, { "epoch": 0.7939037062694839, "grad_norm": 1.2565730810165405, "learning_rate": 9.212205270457698e-06, "loss": 0.2271, "step": 1146 }, { "epoch": 0.7945964669206789, "grad_norm": 1.0296990871429443, "learning_rate": 9.211511789181693e-06, "loss": 0.1656, "step": 1147 }, { "epoch": 0.7952892275718739, "grad_norm": 1.0432108640670776, "learning_rate": 9.210818307905686e-06, "loss": 0.1572, "step": 1148 }, { "epoch": 0.795981988223069, "grad_norm": 1.1227762699127197, "learning_rate": 9.210124826629681e-06, "loss": 0.1803, "step": 1149 }, { "epoch": 0.7966747488742639, "grad_norm": 1.0830938816070557, "learning_rate": 9.209431345353676e-06, "loss": 0.1744, "step": 1150 }, { "epoch": 0.797367509525459, "grad_norm": 1.0725040435791016, "learning_rate": 9.208737864077671e-06, "loss": 0.1465, "step": 1151 }, { "epoch": 0.7980602701766539, "grad_norm": 1.0585803985595703, "learning_rate": 9.208044382801666e-06, "loss": 0.166, "step": 1152 }, { "epoch": 0.798753030827849, "grad_norm": 1.043522834777832, "learning_rate": 9.207350901525659e-06, "loss": 0.1609, "step": 1153 }, { "epoch": 0.799445791479044, "grad_norm": 1.0265485048294067, "learning_rate": 9.206657420249654e-06, "loss": 0.1596, "step": 1154 }, { "epoch": 0.800138552130239, "grad_norm": 1.074607491493225, "learning_rate": 9.205963938973649e-06, "loss": 0.1983, "step": 1155 }, { "epoch": 0.800831312781434, "grad_norm": 1.0008004903793335, "learning_rate": 9.205270457697642e-06, "loss": 0.1739, "step": 1156 }, { "epoch": 0.801524073432629, "grad_norm": 1.0192253589630127, "learning_rate": 9.204576976421637e-06, "loss": 0.1589, "step": 1157 }, { "epoch": 0.802216834083824, "grad_norm": 1.1635502576828003, "learning_rate": 9.203883495145632e-06, "loss": 0.1731, "step": 1158 }, { "epoch": 0.8029095947350191, "grad_norm": 1.1289515495300293, "learning_rate": 9.203190013869625e-06, "loss": 0.161, "step": 1159 }, { "epoch": 0.803602355386214, "grad_norm": 0.9179483652114868, "learning_rate": 9.20249653259362e-06, "loss": 0.144, "step": 1160 }, { "epoch": 0.8042951160374091, "grad_norm": 1.172469139099121, "learning_rate": 9.201803051317615e-06, "loss": 0.1731, "step": 1161 }, { "epoch": 0.804987876688604, "grad_norm": 1.0264207124710083, "learning_rate": 9.20110957004161e-06, "loss": 0.154, "step": 1162 }, { "epoch": 0.8056806373397991, "grad_norm": 1.1471171379089355, "learning_rate": 9.200416088765603e-06, "loss": 0.1777, "step": 1163 }, { "epoch": 0.8063733979909941, "grad_norm": 1.0495991706848145, "learning_rate": 9.199722607489598e-06, "loss": 0.1834, "step": 1164 }, { "epoch": 0.8070661586421891, "grad_norm": 1.0372450351715088, "learning_rate": 9.199029126213593e-06, "loss": 0.1576, "step": 1165 }, { "epoch": 0.8077589192933842, "grad_norm": 1.057462453842163, "learning_rate": 9.198335644937586e-06, "loss": 0.1754, "step": 1166 }, { "epoch": 0.8084516799445791, "grad_norm": 1.0398510694503784, "learning_rate": 9.197642163661581e-06, "loss": 0.1693, "step": 1167 }, { "epoch": 0.8091444405957742, "grad_norm": 0.997722864151001, "learning_rate": 9.196948682385576e-06, "loss": 0.1603, "step": 1168 }, { "epoch": 0.8098372012469691, "grad_norm": 1.0216574668884277, "learning_rate": 9.196255201109571e-06, "loss": 0.1597, "step": 1169 }, { "epoch": 0.8105299618981642, "grad_norm": 0.9988551139831543, "learning_rate": 9.195561719833566e-06, "loss": 0.1481, "step": 1170 }, { "epoch": 0.8112227225493592, "grad_norm": 1.1010034084320068, "learning_rate": 9.19486823855756e-06, "loss": 0.1441, "step": 1171 }, { "epoch": 0.8119154832005542, "grad_norm": 1.089496374130249, "learning_rate": 9.194174757281554e-06, "loss": 0.1717, "step": 1172 }, { "epoch": 0.8126082438517492, "grad_norm": 1.0673224925994873, "learning_rate": 9.193481276005548e-06, "loss": 0.1638, "step": 1173 }, { "epoch": 0.8133010045029442, "grad_norm": 0.9966147541999817, "learning_rate": 9.192787794729543e-06, "loss": 0.1386, "step": 1174 }, { "epoch": 0.8139937651541392, "grad_norm": 1.1094818115234375, "learning_rate": 9.192094313453538e-06, "loss": 0.1182, "step": 1175 }, { "epoch": 0.8146865258053343, "grad_norm": 1.0231292247772217, "learning_rate": 9.191400832177532e-06, "loss": 0.1565, "step": 1176 }, { "epoch": 0.8153792864565292, "grad_norm": 1.0623294115066528, "learning_rate": 9.190707350901527e-06, "loss": 0.165, "step": 1177 }, { "epoch": 0.8160720471077243, "grad_norm": 1.0557681322097778, "learning_rate": 9.19001386962552e-06, "loss": 0.1681, "step": 1178 }, { "epoch": 0.8167648077589192, "grad_norm": 1.084099292755127, "learning_rate": 9.189320388349516e-06, "loss": 0.1722, "step": 1179 }, { "epoch": 0.8174575684101143, "grad_norm": 1.0746034383773804, "learning_rate": 9.18862690707351e-06, "loss": 0.1596, "step": 1180 }, { "epoch": 0.8181503290613094, "grad_norm": 1.026454210281372, "learning_rate": 9.187933425797504e-06, "loss": 0.1605, "step": 1181 }, { "epoch": 0.8188430897125043, "grad_norm": 0.9294998645782471, "learning_rate": 9.187239944521499e-06, "loss": 0.1333, "step": 1182 }, { "epoch": 0.8195358503636994, "grad_norm": 1.0550793409347534, "learning_rate": 9.186546463245492e-06, "loss": 0.1332, "step": 1183 }, { "epoch": 0.8202286110148943, "grad_norm": 0.9574646353721619, "learning_rate": 9.185852981969487e-06, "loss": 0.143, "step": 1184 }, { "epoch": 0.8209213716660894, "grad_norm": 1.0283784866333008, "learning_rate": 9.185159500693482e-06, "loss": 0.1682, "step": 1185 }, { "epoch": 0.8216141323172844, "grad_norm": 1.0605374574661255, "learning_rate": 9.184466019417477e-06, "loss": 0.1476, "step": 1186 }, { "epoch": 0.8223068929684794, "grad_norm": 1.0304421186447144, "learning_rate": 9.183772538141472e-06, "loss": 0.1095, "step": 1187 }, { "epoch": 0.8229996536196744, "grad_norm": 1.1015008687973022, "learning_rate": 9.183079056865465e-06, "loss": 0.1349, "step": 1188 }, { "epoch": 0.8236924142708694, "grad_norm": 1.1376676559448242, "learning_rate": 9.18238557558946e-06, "loss": 0.1681, "step": 1189 }, { "epoch": 0.8243851749220644, "grad_norm": 1.0262609720230103, "learning_rate": 9.181692094313455e-06, "loss": 0.1517, "step": 1190 }, { "epoch": 0.8250779355732595, "grad_norm": 0.9977340698242188, "learning_rate": 9.180998613037448e-06, "loss": 0.152, "step": 1191 }, { "epoch": 0.8257706962244544, "grad_norm": 0.984466016292572, "learning_rate": 9.180305131761443e-06, "loss": 0.1403, "step": 1192 }, { "epoch": 0.8264634568756495, "grad_norm": 1.0744271278381348, "learning_rate": 9.179611650485438e-06, "loss": 0.134, "step": 1193 }, { "epoch": 0.8271562175268444, "grad_norm": 1.0406293869018555, "learning_rate": 9.178918169209433e-06, "loss": 0.1605, "step": 1194 }, { "epoch": 0.8278489781780395, "grad_norm": 1.2142112255096436, "learning_rate": 9.178224687933428e-06, "loss": 0.1864, "step": 1195 }, { "epoch": 0.8285417388292345, "grad_norm": 0.9912194609642029, "learning_rate": 9.177531206657421e-06, "loss": 0.1439, "step": 1196 }, { "epoch": 0.8292344994804295, "grad_norm": 1.0492942333221436, "learning_rate": 9.176837725381416e-06, "loss": 0.1695, "step": 1197 }, { "epoch": 0.8299272601316245, "grad_norm": 0.9441617131233215, "learning_rate": 9.176144244105409e-06, "loss": 0.1256, "step": 1198 }, { "epoch": 0.8306200207828195, "grad_norm": 1.0140005350112915, "learning_rate": 9.175450762829404e-06, "loss": 0.1355, "step": 1199 }, { "epoch": 0.8313127814340145, "grad_norm": 1.0498930215835571, "learning_rate": 9.174757281553399e-06, "loss": 0.1533, "step": 1200 }, { "epoch": 0.8320055420852096, "grad_norm": 1.0157033205032349, "learning_rate": 9.174063800277392e-06, "loss": 0.1648, "step": 1201 }, { "epoch": 0.8326983027364045, "grad_norm": 0.989496111869812, "learning_rate": 9.173370319001387e-06, "loss": 0.1488, "step": 1202 }, { "epoch": 0.8333910633875996, "grad_norm": 0.9286702871322632, "learning_rate": 9.172676837725382e-06, "loss": 0.1183, "step": 1203 }, { "epoch": 0.8340838240387946, "grad_norm": 1.0913113355636597, "learning_rate": 9.171983356449377e-06, "loss": 0.1497, "step": 1204 }, { "epoch": 0.8347765846899896, "grad_norm": 1.0152077674865723, "learning_rate": 9.171289875173372e-06, "loss": 0.1533, "step": 1205 }, { "epoch": 0.8354693453411847, "grad_norm": 0.9213568568229675, "learning_rate": 9.170596393897365e-06, "loss": 0.1348, "step": 1206 }, { "epoch": 0.8361621059923796, "grad_norm": 0.9164698719978333, "learning_rate": 9.16990291262136e-06, "loss": 0.1322, "step": 1207 }, { "epoch": 0.8368548666435747, "grad_norm": 1.0275636911392212, "learning_rate": 9.169209431345353e-06, "loss": 0.1684, "step": 1208 }, { "epoch": 0.8375476272947696, "grad_norm": 1.00309157371521, "learning_rate": 9.168515950069348e-06, "loss": 0.1364, "step": 1209 }, { "epoch": 0.8382403879459647, "grad_norm": 1.0757734775543213, "learning_rate": 9.167822468793343e-06, "loss": 0.128, "step": 1210 }, { "epoch": 0.8389331485971597, "grad_norm": 0.9592078924179077, "learning_rate": 9.167128987517338e-06, "loss": 0.1383, "step": 1211 }, { "epoch": 0.8396259092483547, "grad_norm": 0.9673621654510498, "learning_rate": 9.166435506241333e-06, "loss": 0.1313, "step": 1212 }, { "epoch": 0.8403186698995497, "grad_norm": 1.0815045833587646, "learning_rate": 9.165742024965326e-06, "loss": 0.184, "step": 1213 }, { "epoch": 0.8410114305507447, "grad_norm": 1.0106115341186523, "learning_rate": 9.165048543689321e-06, "loss": 0.1692, "step": 1214 }, { "epoch": 0.8417041912019397, "grad_norm": 1.0461982488632202, "learning_rate": 9.164355062413316e-06, "loss": 0.142, "step": 1215 }, { "epoch": 0.8423969518531348, "grad_norm": 1.1969220638275146, "learning_rate": 9.16366158113731e-06, "loss": 0.1573, "step": 1216 }, { "epoch": 0.8430897125043297, "grad_norm": 1.0679327249526978, "learning_rate": 9.162968099861304e-06, "loss": 0.1659, "step": 1217 }, { "epoch": 0.8437824731555248, "grad_norm": 1.1530208587646484, "learning_rate": 9.162274618585298e-06, "loss": 0.1521, "step": 1218 }, { "epoch": 0.8444752338067197, "grad_norm": 1.0931788682937622, "learning_rate": 9.161581137309293e-06, "loss": 0.1483, "step": 1219 }, { "epoch": 0.8451679944579148, "grad_norm": 0.9220978617668152, "learning_rate": 9.160887656033287e-06, "loss": 0.14, "step": 1220 }, { "epoch": 0.8458607551091099, "grad_norm": 1.0384410619735718, "learning_rate": 9.160194174757282e-06, "loss": 0.1656, "step": 1221 }, { "epoch": 0.8465535157603048, "grad_norm": 1.0730808973312378, "learning_rate": 9.159500693481277e-06, "loss": 0.1542, "step": 1222 }, { "epoch": 0.8472462764114999, "grad_norm": 0.91897052526474, "learning_rate": 9.15880721220527e-06, "loss": 0.1471, "step": 1223 }, { "epoch": 0.8479390370626948, "grad_norm": 1.0821276903152466, "learning_rate": 9.158113730929266e-06, "loss": 0.1534, "step": 1224 }, { "epoch": 0.8486317977138899, "grad_norm": 1.1234447956085205, "learning_rate": 9.15742024965326e-06, "loss": 0.1592, "step": 1225 }, { "epoch": 0.8493245583650849, "grad_norm": 0.9577057361602783, "learning_rate": 9.156726768377254e-06, "loss": 0.1246, "step": 1226 }, { "epoch": 0.8500173190162799, "grad_norm": 1.0541062355041504, "learning_rate": 9.156033287101249e-06, "loss": 0.1688, "step": 1227 }, { "epoch": 0.8507100796674749, "grad_norm": 1.027347445487976, "learning_rate": 9.155339805825244e-06, "loss": 0.1746, "step": 1228 }, { "epoch": 0.8514028403186699, "grad_norm": 0.9498367309570312, "learning_rate": 9.154646324549238e-06, "loss": 0.1384, "step": 1229 }, { "epoch": 0.8520956009698649, "grad_norm": 1.069927453994751, "learning_rate": 9.153952843273233e-06, "loss": 0.1328, "step": 1230 }, { "epoch": 0.85278836162106, "grad_norm": 0.9595523476600647, "learning_rate": 9.153259361997227e-06, "loss": 0.1171, "step": 1231 }, { "epoch": 0.8534811222722549, "grad_norm": 1.2975196838378906, "learning_rate": 9.152565880721222e-06, "loss": 0.1949, "step": 1232 }, { "epoch": 0.85417388292345, "grad_norm": 1.0584015846252441, "learning_rate": 9.151872399445215e-06, "loss": 0.1662, "step": 1233 }, { "epoch": 0.8548666435746449, "grad_norm": 0.9537229537963867, "learning_rate": 9.15117891816921e-06, "loss": 0.1583, "step": 1234 }, { "epoch": 0.85555940422584, "grad_norm": 1.0296034812927246, "learning_rate": 9.150485436893205e-06, "loss": 0.1647, "step": 1235 }, { "epoch": 0.856252164877035, "grad_norm": 1.0385677814483643, "learning_rate": 9.149791955617198e-06, "loss": 0.144, "step": 1236 }, { "epoch": 0.85694492552823, "grad_norm": 1.0480293035507202, "learning_rate": 9.149098474341193e-06, "loss": 0.1494, "step": 1237 }, { "epoch": 0.857637686179425, "grad_norm": 1.0525736808776855, "learning_rate": 9.148404993065188e-06, "loss": 0.1506, "step": 1238 }, { "epoch": 0.85833044683062, "grad_norm": 1.0605429410934448, "learning_rate": 9.147711511789183e-06, "loss": 0.1378, "step": 1239 }, { "epoch": 0.859023207481815, "grad_norm": 1.1563390493392944, "learning_rate": 9.147018030513178e-06, "loss": 0.1696, "step": 1240 }, { "epoch": 0.8597159681330101, "grad_norm": 1.1349605321884155, "learning_rate": 9.146324549237171e-06, "loss": 0.1645, "step": 1241 }, { "epoch": 0.860408728784205, "grad_norm": 0.9695345759391785, "learning_rate": 9.145631067961166e-06, "loss": 0.1487, "step": 1242 }, { "epoch": 0.8611014894354001, "grad_norm": 0.9568183422088623, "learning_rate": 9.144937586685159e-06, "loss": 0.1329, "step": 1243 }, { "epoch": 0.861794250086595, "grad_norm": 1.025154709815979, "learning_rate": 9.144244105409154e-06, "loss": 0.1299, "step": 1244 }, { "epoch": 0.8624870107377901, "grad_norm": 1.0970348119735718, "learning_rate": 9.143550624133149e-06, "loss": 0.1621, "step": 1245 }, { "epoch": 0.863179771388985, "grad_norm": 1.0218442678451538, "learning_rate": 9.142857142857144e-06, "loss": 0.1256, "step": 1246 }, { "epoch": 0.8638725320401801, "grad_norm": 1.2018505334854126, "learning_rate": 9.142163661581139e-06, "loss": 0.1697, "step": 1247 }, { "epoch": 0.8645652926913752, "grad_norm": 1.018684983253479, "learning_rate": 9.141470180305132e-06, "loss": 0.1705, "step": 1248 }, { "epoch": 0.8652580533425701, "grad_norm": 1.0717487335205078, "learning_rate": 9.140776699029127e-06, "loss": 0.1784, "step": 1249 }, { "epoch": 0.8659508139937652, "grad_norm": 0.947426974773407, "learning_rate": 9.140083217753122e-06, "loss": 0.121, "step": 1250 }, { "epoch": 0.8666435746449601, "grad_norm": 1.0973340272903442, "learning_rate": 9.139389736477115e-06, "loss": 0.1572, "step": 1251 }, { "epoch": 0.8673363352961552, "grad_norm": 1.0337198972702026, "learning_rate": 9.13869625520111e-06, "loss": 0.1555, "step": 1252 }, { "epoch": 0.8680290959473502, "grad_norm": 0.9271091818809509, "learning_rate": 9.138002773925105e-06, "loss": 0.1254, "step": 1253 }, { "epoch": 0.8687218565985452, "grad_norm": 1.0046056509017944, "learning_rate": 9.1373092926491e-06, "loss": 0.1498, "step": 1254 }, { "epoch": 0.8694146172497402, "grad_norm": 1.0540696382522583, "learning_rate": 9.136615811373093e-06, "loss": 0.1684, "step": 1255 }, { "epoch": 0.8701073779009352, "grad_norm": 1.0148552656173706, "learning_rate": 9.135922330097088e-06, "loss": 0.1676, "step": 1256 }, { "epoch": 0.8708001385521302, "grad_norm": 0.9528785347938538, "learning_rate": 9.135228848821083e-06, "loss": 0.1379, "step": 1257 }, { "epoch": 0.8714928992033253, "grad_norm": 1.084505319595337, "learning_rate": 9.134535367545076e-06, "loss": 0.1533, "step": 1258 }, { "epoch": 0.8721856598545202, "grad_norm": 0.9560432434082031, "learning_rate": 9.133841886269071e-06, "loss": 0.1566, "step": 1259 }, { "epoch": 0.8728784205057153, "grad_norm": 1.0317646265029907, "learning_rate": 9.133148404993066e-06, "loss": 0.1636, "step": 1260 }, { "epoch": 0.8735711811569102, "grad_norm": 1.1282131671905518, "learning_rate": 9.13245492371706e-06, "loss": 0.1798, "step": 1261 }, { "epoch": 0.8742639418081053, "grad_norm": 0.9957450032234192, "learning_rate": 9.131761442441054e-06, "loss": 0.1419, "step": 1262 }, { "epoch": 0.8749567024593004, "grad_norm": 1.069015383720398, "learning_rate": 9.13106796116505e-06, "loss": 0.1485, "step": 1263 }, { "epoch": 0.8756494631104953, "grad_norm": 1.1129701137542725, "learning_rate": 9.130374479889044e-06, "loss": 0.169, "step": 1264 }, { "epoch": 0.8763422237616904, "grad_norm": 1.0611717700958252, "learning_rate": 9.12968099861304e-06, "loss": 0.1422, "step": 1265 }, { "epoch": 0.8770349844128853, "grad_norm": 1.0714110136032104, "learning_rate": 9.128987517337032e-06, "loss": 0.1913, "step": 1266 }, { "epoch": 0.8777277450640804, "grad_norm": 1.039005160331726, "learning_rate": 9.128294036061027e-06, "loss": 0.1547, "step": 1267 }, { "epoch": 0.8784205057152754, "grad_norm": 1.1571332216262817, "learning_rate": 9.12760055478502e-06, "loss": 0.1821, "step": 1268 }, { "epoch": 0.8791132663664704, "grad_norm": 1.127876877784729, "learning_rate": 9.126907073509016e-06, "loss": 0.1656, "step": 1269 }, { "epoch": 0.8798060270176654, "grad_norm": 0.9981972575187683, "learning_rate": 9.12621359223301e-06, "loss": 0.1552, "step": 1270 }, { "epoch": 0.8804987876688604, "grad_norm": 1.0318617820739746, "learning_rate": 9.125520110957005e-06, "loss": 0.1624, "step": 1271 }, { "epoch": 0.8811915483200554, "grad_norm": 1.4229623079299927, "learning_rate": 9.124826629681e-06, "loss": 0.1844, "step": 1272 }, { "epoch": 0.8818843089712505, "grad_norm": 1.1267119646072388, "learning_rate": 9.124133148404994e-06, "loss": 0.2063, "step": 1273 }, { "epoch": 0.8825770696224454, "grad_norm": 1.0590894222259521, "learning_rate": 9.123439667128988e-06, "loss": 0.1354, "step": 1274 }, { "epoch": 0.8832698302736405, "grad_norm": 0.9794348478317261, "learning_rate": 9.122746185852983e-06, "loss": 0.176, "step": 1275 }, { "epoch": 0.8839625909248354, "grad_norm": 1.099520206451416, "learning_rate": 9.122052704576977e-06, "loss": 0.183, "step": 1276 }, { "epoch": 0.8846553515760305, "grad_norm": 1.0151985883712769, "learning_rate": 9.121359223300972e-06, "loss": 0.1597, "step": 1277 }, { "epoch": 0.8853481122272255, "grad_norm": 0.9931125640869141, "learning_rate": 9.120665742024965e-06, "loss": 0.1436, "step": 1278 }, { "epoch": 0.8860408728784205, "grad_norm": 0.897212028503418, "learning_rate": 9.11997226074896e-06, "loss": 0.133, "step": 1279 }, { "epoch": 0.8867336335296155, "grad_norm": 1.0599122047424316, "learning_rate": 9.119278779472955e-06, "loss": 0.1428, "step": 1280 }, { "epoch": 0.8874263941808105, "grad_norm": 1.0727603435516357, "learning_rate": 9.11858529819695e-06, "loss": 0.1512, "step": 1281 }, { "epoch": 0.8881191548320055, "grad_norm": 1.020111083984375, "learning_rate": 9.117891816920945e-06, "loss": 0.1463, "step": 1282 }, { "epoch": 0.8888119154832006, "grad_norm": 1.0650858879089355, "learning_rate": 9.117198335644938e-06, "loss": 0.1516, "step": 1283 }, { "epoch": 0.8895046761343955, "grad_norm": 0.9823405146598816, "learning_rate": 9.116504854368933e-06, "loss": 0.131, "step": 1284 }, { "epoch": 0.8901974367855906, "grad_norm": 1.0441343784332275, "learning_rate": 9.115811373092928e-06, "loss": 0.1512, "step": 1285 }, { "epoch": 0.8908901974367855, "grad_norm": 0.9891427159309387, "learning_rate": 9.115117891816921e-06, "loss": 0.109, "step": 1286 }, { "epoch": 0.8915829580879806, "grad_norm": 1.0274287462234497, "learning_rate": 9.114424410540916e-06, "loss": 0.1653, "step": 1287 }, { "epoch": 0.8922757187391757, "grad_norm": 1.041791558265686, "learning_rate": 9.11373092926491e-06, "loss": 0.154, "step": 1288 }, { "epoch": 0.8929684793903706, "grad_norm": 0.9982712268829346, "learning_rate": 9.113037447988906e-06, "loss": 0.1295, "step": 1289 }, { "epoch": 0.8936612400415657, "grad_norm": 1.0003913640975952, "learning_rate": 9.1123439667129e-06, "loss": 0.1516, "step": 1290 }, { "epoch": 0.8943540006927606, "grad_norm": 1.0033366680145264, "learning_rate": 9.111650485436894e-06, "loss": 0.1534, "step": 1291 }, { "epoch": 0.8950467613439557, "grad_norm": 1.1789953708648682, "learning_rate": 9.110957004160889e-06, "loss": 0.1808, "step": 1292 }, { "epoch": 0.8957395219951507, "grad_norm": 1.0715515613555908, "learning_rate": 9.110263522884882e-06, "loss": 0.1636, "step": 1293 }, { "epoch": 0.8964322826463457, "grad_norm": 0.8517835736274719, "learning_rate": 9.109570041608877e-06, "loss": 0.1375, "step": 1294 }, { "epoch": 0.8971250432975407, "grad_norm": 0.9976966381072998, "learning_rate": 9.108876560332872e-06, "loss": 0.1512, "step": 1295 }, { "epoch": 0.8978178039487357, "grad_norm": 1.0409802198410034, "learning_rate": 9.108183079056865e-06, "loss": 0.1633, "step": 1296 }, { "epoch": 0.8985105645999307, "grad_norm": 1.0344579219818115, "learning_rate": 9.10748959778086e-06, "loss": 0.1239, "step": 1297 }, { "epoch": 0.8992033252511258, "grad_norm": 1.0247353315353394, "learning_rate": 9.106796116504855e-06, "loss": 0.1605, "step": 1298 }, { "epoch": 0.8998960859023207, "grad_norm": 0.9276660680770874, "learning_rate": 9.10610263522885e-06, "loss": 0.1316, "step": 1299 }, { "epoch": 0.9005888465535158, "grad_norm": 1.0993589162826538, "learning_rate": 9.105409153952845e-06, "loss": 0.1622, "step": 1300 }, { "epoch": 0.9012816072047107, "grad_norm": 1.0584131479263306, "learning_rate": 9.104715672676838e-06, "loss": 0.1425, "step": 1301 }, { "epoch": 0.9019743678559058, "grad_norm": 1.1870489120483398, "learning_rate": 9.104022191400833e-06, "loss": 0.1862, "step": 1302 }, { "epoch": 0.9026671285071008, "grad_norm": 1.0090534687042236, "learning_rate": 9.103328710124826e-06, "loss": 0.1605, "step": 1303 }, { "epoch": 0.9033598891582958, "grad_norm": 0.8897512555122375, "learning_rate": 9.102635228848821e-06, "loss": 0.1438, "step": 1304 }, { "epoch": 0.9040526498094908, "grad_norm": 1.1026703119277954, "learning_rate": 9.101941747572816e-06, "loss": 0.1527, "step": 1305 }, { "epoch": 0.9047454104606858, "grad_norm": 0.974031925201416, "learning_rate": 9.101248266296811e-06, "loss": 0.1516, "step": 1306 }, { "epoch": 0.9054381711118809, "grad_norm": 1.025345802307129, "learning_rate": 9.100554785020806e-06, "loss": 0.1565, "step": 1307 }, { "epoch": 0.9061309317630759, "grad_norm": 1.143355131149292, "learning_rate": 9.0998613037448e-06, "loss": 0.1692, "step": 1308 }, { "epoch": 0.9068236924142709, "grad_norm": 1.1369552612304688, "learning_rate": 9.099167822468794e-06, "loss": 0.1573, "step": 1309 }, { "epoch": 0.9075164530654659, "grad_norm": 0.9756319522857666, "learning_rate": 9.098474341192789e-06, "loss": 0.1448, "step": 1310 }, { "epoch": 0.9082092137166609, "grad_norm": 1.089788556098938, "learning_rate": 9.097780859916782e-06, "loss": 0.1767, "step": 1311 }, { "epoch": 0.9089019743678559, "grad_norm": 0.9750568270683289, "learning_rate": 9.097087378640777e-06, "loss": 0.1628, "step": 1312 }, { "epoch": 0.909594735019051, "grad_norm": 0.8872171640396118, "learning_rate": 9.096393897364772e-06, "loss": 0.093, "step": 1313 }, { "epoch": 0.9102874956702459, "grad_norm": 1.1109753847122192, "learning_rate": 9.095700416088765e-06, "loss": 0.1749, "step": 1314 }, { "epoch": 0.910980256321441, "grad_norm": 0.9836993217468262, "learning_rate": 9.09500693481276e-06, "loss": 0.1632, "step": 1315 }, { "epoch": 0.9116730169726359, "grad_norm": 1.0990700721740723, "learning_rate": 9.094313453536755e-06, "loss": 0.1645, "step": 1316 }, { "epoch": 0.912365777623831, "grad_norm": 0.9555688500404358, "learning_rate": 9.09361997226075e-06, "loss": 0.136, "step": 1317 }, { "epoch": 0.913058538275026, "grad_norm": 1.057181477546692, "learning_rate": 9.092926490984744e-06, "loss": 0.1388, "step": 1318 }, { "epoch": 0.913751298926221, "grad_norm": 1.0919501781463623, "learning_rate": 9.092233009708738e-06, "loss": 0.146, "step": 1319 }, { "epoch": 0.914444059577416, "grad_norm": 0.9598453640937805, "learning_rate": 9.091539528432733e-06, "loss": 0.1565, "step": 1320 }, { "epoch": 0.915136820228611, "grad_norm": 0.9124870896339417, "learning_rate": 9.090846047156727e-06, "loss": 0.1346, "step": 1321 }, { "epoch": 0.915829580879806, "grad_norm": 1.0432202816009521, "learning_rate": 9.090152565880722e-06, "loss": 0.1416, "step": 1322 }, { "epoch": 0.9165223415310011, "grad_norm": 0.9280151128768921, "learning_rate": 9.089459084604716e-06, "loss": 0.1473, "step": 1323 }, { "epoch": 0.917215102182196, "grad_norm": 0.9922134876251221, "learning_rate": 9.088765603328711e-06, "loss": 0.1355, "step": 1324 }, { "epoch": 0.9179078628333911, "grad_norm": 0.9754739999771118, "learning_rate": 9.088072122052706e-06, "loss": 0.1455, "step": 1325 }, { "epoch": 0.918600623484586, "grad_norm": 1.052607774734497, "learning_rate": 9.0873786407767e-06, "loss": 0.1508, "step": 1326 }, { "epoch": 0.9192933841357811, "grad_norm": 1.0654557943344116, "learning_rate": 9.086685159500695e-06, "loss": 0.1437, "step": 1327 }, { "epoch": 0.919986144786976, "grad_norm": 1.0544451475143433, "learning_rate": 9.085991678224688e-06, "loss": 0.1565, "step": 1328 }, { "epoch": 0.9206789054381711, "grad_norm": 1.0749974250793457, "learning_rate": 9.085298196948683e-06, "loss": 0.1582, "step": 1329 }, { "epoch": 0.9213716660893662, "grad_norm": 0.9863892793655396, "learning_rate": 9.084604715672678e-06, "loss": 0.1438, "step": 1330 }, { "epoch": 0.9220644267405611, "grad_norm": 1.0883053541183472, "learning_rate": 9.083911234396673e-06, "loss": 0.1725, "step": 1331 }, { "epoch": 0.9227571873917562, "grad_norm": 0.9873329401016235, "learning_rate": 9.083217753120668e-06, "loss": 0.132, "step": 1332 }, { "epoch": 0.9234499480429511, "grad_norm": 1.089841365814209, "learning_rate": 9.08252427184466e-06, "loss": 0.1607, "step": 1333 }, { "epoch": 0.9241427086941462, "grad_norm": 1.1246720552444458, "learning_rate": 9.081830790568656e-06, "loss": 0.1873, "step": 1334 }, { "epoch": 0.9248354693453412, "grad_norm": 0.9264191389083862, "learning_rate": 9.08113730929265e-06, "loss": 0.1419, "step": 1335 }, { "epoch": 0.9255282299965362, "grad_norm": 1.1533194780349731, "learning_rate": 9.080443828016644e-06, "loss": 0.171, "step": 1336 }, { "epoch": 0.9262209906477312, "grad_norm": 1.1083037853240967, "learning_rate": 9.079750346740639e-06, "loss": 0.1362, "step": 1337 }, { "epoch": 0.9269137512989262, "grad_norm": 1.1557817459106445, "learning_rate": 9.079056865464632e-06, "loss": 0.1533, "step": 1338 }, { "epoch": 0.9276065119501212, "grad_norm": 0.9090328812599182, "learning_rate": 9.078363384188627e-06, "loss": 0.1521, "step": 1339 }, { "epoch": 0.9282992726013163, "grad_norm": 0.977802574634552, "learning_rate": 9.077669902912622e-06, "loss": 0.1684, "step": 1340 }, { "epoch": 0.9289920332525112, "grad_norm": 0.9049829244613647, "learning_rate": 9.076976421636617e-06, "loss": 0.138, "step": 1341 }, { "epoch": 0.9296847939037063, "grad_norm": 1.0639560222625732, "learning_rate": 9.076282940360612e-06, "loss": 0.1769, "step": 1342 }, { "epoch": 0.9303775545549012, "grad_norm": 0.9426189064979553, "learning_rate": 9.075589459084605e-06, "loss": 0.1313, "step": 1343 }, { "epoch": 0.9310703152060963, "grad_norm": 0.97137850522995, "learning_rate": 9.0748959778086e-06, "loss": 0.1614, "step": 1344 }, { "epoch": 0.9317630758572913, "grad_norm": 1.0211855173110962, "learning_rate": 9.074202496532595e-06, "loss": 0.1647, "step": 1345 }, { "epoch": 0.9324558365084863, "grad_norm": 0.9157480597496033, "learning_rate": 9.073509015256588e-06, "loss": 0.1517, "step": 1346 }, { "epoch": 0.9331485971596813, "grad_norm": 1.0401955842971802, "learning_rate": 9.072815533980583e-06, "loss": 0.1677, "step": 1347 }, { "epoch": 0.9338413578108763, "grad_norm": 1.2260417938232422, "learning_rate": 9.072122052704578e-06, "loss": 0.1758, "step": 1348 }, { "epoch": 0.9345341184620714, "grad_norm": 1.0605547428131104, "learning_rate": 9.071428571428573e-06, "loss": 0.1495, "step": 1349 }, { "epoch": 0.9352268791132664, "grad_norm": 0.9659693241119385, "learning_rate": 9.070735090152568e-06, "loss": 0.1332, "step": 1350 }, { "epoch": 0.9359196397644614, "grad_norm": 0.9812120199203491, "learning_rate": 9.070041608876561e-06, "loss": 0.1333, "step": 1351 }, { "epoch": 0.9366124004156564, "grad_norm": 0.9992308616638184, "learning_rate": 9.069348127600556e-06, "loss": 0.1607, "step": 1352 }, { "epoch": 0.9373051610668514, "grad_norm": 1.1059376001358032, "learning_rate": 9.06865464632455e-06, "loss": 0.1627, "step": 1353 }, { "epoch": 0.9379979217180464, "grad_norm": 1.1383405923843384, "learning_rate": 9.067961165048544e-06, "loss": 0.1438, "step": 1354 }, { "epoch": 0.9386906823692415, "grad_norm": 1.1224113702774048, "learning_rate": 9.067267683772539e-06, "loss": 0.1716, "step": 1355 }, { "epoch": 0.9393834430204364, "grad_norm": 1.0540070533752441, "learning_rate": 9.066574202496532e-06, "loss": 0.1475, "step": 1356 }, { "epoch": 0.9400762036716315, "grad_norm": 0.9050190448760986, "learning_rate": 9.065880721220527e-06, "loss": 0.1358, "step": 1357 }, { "epoch": 0.9407689643228264, "grad_norm": 0.995415985584259, "learning_rate": 9.065187239944522e-06, "loss": 0.1481, "step": 1358 }, { "epoch": 0.9414617249740215, "grad_norm": 1.6251403093338013, "learning_rate": 9.064493758668517e-06, "loss": 0.1532, "step": 1359 }, { "epoch": 0.9421544856252165, "grad_norm": 1.0649985074996948, "learning_rate": 9.063800277392512e-06, "loss": 0.1583, "step": 1360 }, { "epoch": 0.9428472462764115, "grad_norm": 1.1055551767349243, "learning_rate": 9.063106796116505e-06, "loss": 0.1727, "step": 1361 }, { "epoch": 0.9435400069276065, "grad_norm": 1.0746663808822632, "learning_rate": 9.0624133148405e-06, "loss": 0.1673, "step": 1362 }, { "epoch": 0.9442327675788015, "grad_norm": 0.9959187507629395, "learning_rate": 9.061719833564494e-06, "loss": 0.1528, "step": 1363 }, { "epoch": 0.9449255282299965, "grad_norm": 1.0603806972503662, "learning_rate": 9.061026352288488e-06, "loss": 0.1448, "step": 1364 }, { "epoch": 0.9456182888811916, "grad_norm": 1.0264545679092407, "learning_rate": 9.060332871012483e-06, "loss": 0.1269, "step": 1365 }, { "epoch": 0.9463110495323865, "grad_norm": 1.0569376945495605, "learning_rate": 9.059639389736478e-06, "loss": 0.16, "step": 1366 }, { "epoch": 0.9470038101835816, "grad_norm": 0.9829663038253784, "learning_rate": 9.058945908460473e-06, "loss": 0.1507, "step": 1367 }, { "epoch": 0.9476965708347765, "grad_norm": 1.0312482118606567, "learning_rate": 9.058252427184466e-06, "loss": 0.1346, "step": 1368 }, { "epoch": 0.9483893314859716, "grad_norm": 1.0962169170379639, "learning_rate": 9.057558945908461e-06, "loss": 0.1874, "step": 1369 }, { "epoch": 0.9490820921371667, "grad_norm": 1.0141427516937256, "learning_rate": 9.056865464632456e-06, "loss": 0.1431, "step": 1370 }, { "epoch": 0.9497748527883616, "grad_norm": 1.0795923471450806, "learning_rate": 9.05617198335645e-06, "loss": 0.1631, "step": 1371 }, { "epoch": 0.9504676134395567, "grad_norm": 0.9833242297172546, "learning_rate": 9.055478502080445e-06, "loss": 0.1337, "step": 1372 }, { "epoch": 0.9511603740907516, "grad_norm": 1.0700855255126953, "learning_rate": 9.054785020804438e-06, "loss": 0.1398, "step": 1373 }, { "epoch": 0.9518531347419467, "grad_norm": 0.9360601902008057, "learning_rate": 9.054091539528433e-06, "loss": 0.1494, "step": 1374 }, { "epoch": 0.9525458953931417, "grad_norm": 1.0651384592056274, "learning_rate": 9.053398058252428e-06, "loss": 0.1443, "step": 1375 }, { "epoch": 0.9532386560443367, "grad_norm": 1.0867021083831787, "learning_rate": 9.052704576976423e-06, "loss": 0.1655, "step": 1376 }, { "epoch": 0.9539314166955317, "grad_norm": 1.0285042524337769, "learning_rate": 9.052011095700417e-06, "loss": 0.15, "step": 1377 }, { "epoch": 0.9546241773467267, "grad_norm": 1.0461286306381226, "learning_rate": 9.05131761442441e-06, "loss": 0.1586, "step": 1378 }, { "epoch": 0.9553169379979217, "grad_norm": 1.0037174224853516, "learning_rate": 9.050624133148406e-06, "loss": 0.1109, "step": 1379 }, { "epoch": 0.9560096986491168, "grad_norm": 0.9715021848678589, "learning_rate": 9.0499306518724e-06, "loss": 0.1604, "step": 1380 }, { "epoch": 0.9567024593003117, "grad_norm": 1.1860789060592651, "learning_rate": 9.049237170596394e-06, "loss": 0.1528, "step": 1381 }, { "epoch": 0.9573952199515068, "grad_norm": 0.952221155166626, "learning_rate": 9.048543689320389e-06, "loss": 0.1345, "step": 1382 }, { "epoch": 0.9580879806027017, "grad_norm": 1.0256909132003784, "learning_rate": 9.047850208044384e-06, "loss": 0.1694, "step": 1383 }, { "epoch": 0.9587807412538968, "grad_norm": 1.0124690532684326, "learning_rate": 9.047156726768379e-06, "loss": 0.1369, "step": 1384 }, { "epoch": 0.9594735019050918, "grad_norm": 1.1516591310501099, "learning_rate": 9.046463245492374e-06, "loss": 0.1482, "step": 1385 }, { "epoch": 0.9601662625562868, "grad_norm": 1.0505039691925049, "learning_rate": 9.045769764216367e-06, "loss": 0.1585, "step": 1386 }, { "epoch": 0.9608590232074818, "grad_norm": 1.0213027000427246, "learning_rate": 9.045076282940362e-06, "loss": 0.1236, "step": 1387 }, { "epoch": 0.9615517838586768, "grad_norm": 0.9856483936309814, "learning_rate": 9.044382801664355e-06, "loss": 0.133, "step": 1388 }, { "epoch": 0.9622445445098718, "grad_norm": 1.0869338512420654, "learning_rate": 9.04368932038835e-06, "loss": 0.1758, "step": 1389 }, { "epoch": 0.9629373051610669, "grad_norm": 0.9334696531295776, "learning_rate": 9.042995839112345e-06, "loss": 0.1199, "step": 1390 }, { "epoch": 0.9636300658122618, "grad_norm": 1.0358421802520752, "learning_rate": 9.042302357836338e-06, "loss": 0.1334, "step": 1391 }, { "epoch": 0.9643228264634569, "grad_norm": 1.0631022453308105, "learning_rate": 9.041608876560333e-06, "loss": 0.1376, "step": 1392 }, { "epoch": 0.9650155871146519, "grad_norm": 0.982193112373352, "learning_rate": 9.040915395284328e-06, "loss": 0.1393, "step": 1393 }, { "epoch": 0.9657083477658469, "grad_norm": 0.9237812161445618, "learning_rate": 9.040221914008323e-06, "loss": 0.1341, "step": 1394 }, { "epoch": 0.966401108417042, "grad_norm": 0.9706209897994995, "learning_rate": 9.039528432732318e-06, "loss": 0.1439, "step": 1395 }, { "epoch": 0.9670938690682369, "grad_norm": 0.952503502368927, "learning_rate": 9.038834951456311e-06, "loss": 0.1498, "step": 1396 }, { "epoch": 0.967786629719432, "grad_norm": 1.0859705209732056, "learning_rate": 9.038141470180306e-06, "loss": 0.1552, "step": 1397 }, { "epoch": 0.9684793903706269, "grad_norm": 1.0285351276397705, "learning_rate": 9.0374479889043e-06, "loss": 0.1636, "step": 1398 }, { "epoch": 0.969172151021822, "grad_norm": 1.1443272829055786, "learning_rate": 9.036754507628294e-06, "loss": 0.1717, "step": 1399 }, { "epoch": 0.969864911673017, "grad_norm": 1.1059259176254272, "learning_rate": 9.036061026352289e-06, "loss": 0.1876, "step": 1400 }, { "epoch": 0.970557672324212, "grad_norm": 1.0529000759124756, "learning_rate": 9.035367545076284e-06, "loss": 0.1507, "step": 1401 }, { "epoch": 0.971250432975407, "grad_norm": 1.1878215074539185, "learning_rate": 9.034674063800279e-06, "loss": 0.1675, "step": 1402 }, { "epoch": 0.971943193626602, "grad_norm": 1.0230395793914795, "learning_rate": 9.033980582524272e-06, "loss": 0.1408, "step": 1403 }, { "epoch": 0.972635954277797, "grad_norm": 0.9114059805870056, "learning_rate": 9.033287101248267e-06, "loss": 0.155, "step": 1404 }, { "epoch": 0.973328714928992, "grad_norm": 0.9873482584953308, "learning_rate": 9.032593619972262e-06, "loss": 0.143, "step": 1405 }, { "epoch": 0.974021475580187, "grad_norm": 0.9903661012649536, "learning_rate": 9.031900138696255e-06, "loss": 0.1368, "step": 1406 }, { "epoch": 0.9747142362313821, "grad_norm": 0.9592527747154236, "learning_rate": 9.03120665742025e-06, "loss": 0.14, "step": 1407 }, { "epoch": 0.975406996882577, "grad_norm": 0.9270055294036865, "learning_rate": 9.030513176144245e-06, "loss": 0.1252, "step": 1408 }, { "epoch": 0.9760997575337721, "grad_norm": 1.0739645957946777, "learning_rate": 9.02981969486824e-06, "loss": 0.1686, "step": 1409 }, { "epoch": 0.976792518184967, "grad_norm": 0.9901601076126099, "learning_rate": 9.029126213592233e-06, "loss": 0.1307, "step": 1410 }, { "epoch": 0.9774852788361621, "grad_norm": 1.122162103652954, "learning_rate": 9.028432732316228e-06, "loss": 0.1714, "step": 1411 }, { "epoch": 0.9781780394873572, "grad_norm": 1.008817195892334, "learning_rate": 9.027739251040223e-06, "loss": 0.161, "step": 1412 }, { "epoch": 0.9788708001385521, "grad_norm": 1.0328139066696167, "learning_rate": 9.027045769764216e-06, "loss": 0.1472, "step": 1413 }, { "epoch": 0.9795635607897472, "grad_norm": 1.090971827507019, "learning_rate": 9.026352288488211e-06, "loss": 0.1416, "step": 1414 }, { "epoch": 0.9802563214409421, "grad_norm": 0.9473955035209656, "learning_rate": 9.025658807212206e-06, "loss": 0.141, "step": 1415 }, { "epoch": 0.9809490820921372, "grad_norm": 0.8541167378425598, "learning_rate": 9.0249653259362e-06, "loss": 0.1232, "step": 1416 }, { "epoch": 0.9816418427433322, "grad_norm": 0.8578402400016785, "learning_rate": 9.024271844660194e-06, "loss": 0.1119, "step": 1417 }, { "epoch": 0.9823346033945272, "grad_norm": 0.9830331206321716, "learning_rate": 9.02357836338419e-06, "loss": 0.136, "step": 1418 }, { "epoch": 0.9830273640457222, "grad_norm": 1.096903681755066, "learning_rate": 9.022884882108184e-06, "loss": 0.1689, "step": 1419 }, { "epoch": 0.9837201246969172, "grad_norm": 0.9177114367485046, "learning_rate": 9.02219140083218e-06, "loss": 0.1104, "step": 1420 }, { "epoch": 0.9844128853481122, "grad_norm": 0.9560793042182922, "learning_rate": 9.021497919556173e-06, "loss": 0.1328, "step": 1421 }, { "epoch": 0.9851056459993073, "grad_norm": 1.094313383102417, "learning_rate": 9.020804438280167e-06, "loss": 0.1503, "step": 1422 }, { "epoch": 0.9857984066505022, "grad_norm": 1.0043598413467407, "learning_rate": 9.02011095700416e-06, "loss": 0.1547, "step": 1423 }, { "epoch": 0.9864911673016973, "grad_norm": 0.963378369808197, "learning_rate": 9.019417475728156e-06, "loss": 0.1303, "step": 1424 }, { "epoch": 0.9871839279528922, "grad_norm": 1.1278101205825806, "learning_rate": 9.01872399445215e-06, "loss": 0.1754, "step": 1425 }, { "epoch": 0.9878766886040873, "grad_norm": 1.0143046379089355, "learning_rate": 9.018030513176146e-06, "loss": 0.1554, "step": 1426 }, { "epoch": 0.9885694492552823, "grad_norm": 1.0028659105300903, "learning_rate": 9.01733703190014e-06, "loss": 0.1473, "step": 1427 }, { "epoch": 0.9892622099064773, "grad_norm": 1.020531177520752, "learning_rate": 9.016643550624134e-06, "loss": 0.1401, "step": 1428 }, { "epoch": 0.9899549705576723, "grad_norm": 0.9271031022071838, "learning_rate": 9.015950069348129e-06, "loss": 0.1372, "step": 1429 }, { "epoch": 0.9906477312088673, "grad_norm": 1.1084705591201782, "learning_rate": 9.015256588072124e-06, "loss": 0.1726, "step": 1430 }, { "epoch": 0.9913404918600623, "grad_norm": 0.9905039072036743, "learning_rate": 9.014563106796117e-06, "loss": 0.1245, "step": 1431 }, { "epoch": 0.9920332525112574, "grad_norm": 1.114532470703125, "learning_rate": 9.013869625520112e-06, "loss": 0.1773, "step": 1432 }, { "epoch": 0.9927260131624523, "grad_norm": 0.9326059818267822, "learning_rate": 9.013176144244105e-06, "loss": 0.1575, "step": 1433 }, { "epoch": 0.9934187738136474, "grad_norm": 1.106297254562378, "learning_rate": 9.0124826629681e-06, "loss": 0.1667, "step": 1434 }, { "epoch": 0.9941115344648424, "grad_norm": 1.1321109533309937, "learning_rate": 9.011789181692095e-06, "loss": 0.1443, "step": 1435 }, { "epoch": 0.9948042951160374, "grad_norm": 0.9964682459831238, "learning_rate": 9.01109570041609e-06, "loss": 0.1427, "step": 1436 }, { "epoch": 0.9954970557672325, "grad_norm": 1.0049324035644531, "learning_rate": 9.010402219140085e-06, "loss": 0.142, "step": 1437 }, { "epoch": 0.9961898164184274, "grad_norm": 1.120507001876831, "learning_rate": 9.009708737864078e-06, "loss": 0.1791, "step": 1438 }, { "epoch": 0.9968825770696225, "grad_norm": 1.191520094871521, "learning_rate": 9.009015256588073e-06, "loss": 0.1568, "step": 1439 }, { "epoch": 0.9975753377208174, "grad_norm": 1.0339500904083252, "learning_rate": 9.008321775312068e-06, "loss": 0.1435, "step": 1440 }, { "epoch": 0.9982680983720125, "grad_norm": 1.1104273796081543, "learning_rate": 9.007628294036061e-06, "loss": 0.1516, "step": 1441 }, { "epoch": 0.9989608590232075, "grad_norm": 1.2329273223876953, "learning_rate": 9.006934812760056e-06, "loss": 0.1517, "step": 1442 }, { "epoch": 0.9996536196744025, "grad_norm": 1.001638650894165, "learning_rate": 9.006241331484051e-06, "loss": 0.1419, "step": 1443 }, { "epoch": 0.9996536196744025, "eval_loss": 0.25841060280799866, "eval_runtime": 7627.0462, "eval_samples_per_second": 1.049, "eval_steps_per_second": 0.033, "eval_wer": 14.441454977462525, "step": 1443 }, { "epoch": 1.0003463803255974, "grad_norm": 0.8515987992286682, "learning_rate": 9.005547850208046e-06, "loss": 0.1159, "step": 1444 }, { "epoch": 1.0010391409767925, "grad_norm": 0.6200939416885376, "learning_rate": 9.00485436893204e-06, "loss": 0.0749, "step": 1445 }, { "epoch": 1.0017319016279875, "grad_norm": 0.7645689249038696, "learning_rate": 9.004160887656034e-06, "loss": 0.1186, "step": 1446 }, { "epoch": 1.0024246622791826, "grad_norm": 0.8087880611419678, "learning_rate": 9.003467406380029e-06, "loss": 0.1118, "step": 1447 }, { "epoch": 1.0031174229303776, "grad_norm": 0.8951125741004944, "learning_rate": 9.002773925104022e-06, "loss": 0.1138, "step": 1448 }, { "epoch": 1.0038101835815725, "grad_norm": 0.8107717037200928, "learning_rate": 9.002080443828017e-06, "loss": 0.1061, "step": 1449 }, { "epoch": 1.0045029442327675, "grad_norm": 0.6840798258781433, "learning_rate": 9.001386962552012e-06, "loss": 0.0901, "step": 1450 }, { "epoch": 1.0051957048839626, "grad_norm": 0.7962857484817505, "learning_rate": 9.000693481276005e-06, "loss": 0.0908, "step": 1451 }, { "epoch": 1.0058884655351576, "grad_norm": 0.7427263855934143, "learning_rate": 9e-06, "loss": 0.091, "step": 1452 }, { "epoch": 1.0065812261863527, "grad_norm": 0.9398947954177856, "learning_rate": 8.999306518723995e-06, "loss": 0.1119, "step": 1453 }, { "epoch": 1.0072739868375475, "grad_norm": 0.801214873790741, "learning_rate": 8.99861303744799e-06, "loss": 0.1067, "step": 1454 }, { "epoch": 1.0079667474887426, "grad_norm": 0.7821488976478577, "learning_rate": 8.997919556171985e-06, "loss": 0.1015, "step": 1455 }, { "epoch": 1.0086595081399377, "grad_norm": 0.9047034382820129, "learning_rate": 8.997226074895978e-06, "loss": 0.1166, "step": 1456 }, { "epoch": 1.0093522687911327, "grad_norm": 1.458219051361084, "learning_rate": 8.996532593619973e-06, "loss": 0.0968, "step": 1457 }, { "epoch": 1.0100450294423278, "grad_norm": 0.8063825964927673, "learning_rate": 8.995839112343966e-06, "loss": 0.0941, "step": 1458 }, { "epoch": 1.0107377900935226, "grad_norm": 0.8319393396377563, "learning_rate": 8.995145631067961e-06, "loss": 0.0862, "step": 1459 }, { "epoch": 1.0114305507447177, "grad_norm": 0.9785870909690857, "learning_rate": 8.994452149791956e-06, "loss": 0.1093, "step": 1460 }, { "epoch": 1.0121233113959127, "grad_norm": 0.8586106300354004, "learning_rate": 8.993758668515951e-06, "loss": 0.102, "step": 1461 }, { "epoch": 1.0128160720471078, "grad_norm": 0.8335251808166504, "learning_rate": 8.993065187239946e-06, "loss": 0.1003, "step": 1462 }, { "epoch": 1.0135088326983028, "grad_norm": 0.8018205165863037, "learning_rate": 8.99237170596394e-06, "loss": 0.0983, "step": 1463 }, { "epoch": 1.0142015933494977, "grad_norm": 0.824069619178772, "learning_rate": 8.991678224687934e-06, "loss": 0.1018, "step": 1464 }, { "epoch": 1.0148943540006927, "grad_norm": 0.8895369172096252, "learning_rate": 8.99098474341193e-06, "loss": 0.0986, "step": 1465 }, { "epoch": 1.0155871146518878, "grad_norm": 0.9495205879211426, "learning_rate": 8.990291262135923e-06, "loss": 0.111, "step": 1466 }, { "epoch": 1.0162798753030828, "grad_norm": 0.823100209236145, "learning_rate": 8.989597780859917e-06, "loss": 0.1117, "step": 1467 }, { "epoch": 1.016972635954278, "grad_norm": 0.7357906103134155, "learning_rate": 8.98890429958391e-06, "loss": 0.0822, "step": 1468 }, { "epoch": 1.0176653966054727, "grad_norm": 0.7708436846733093, "learning_rate": 8.988210818307906e-06, "loss": 0.0922, "step": 1469 }, { "epoch": 1.0183581572566678, "grad_norm": 0.8186256885528564, "learning_rate": 8.9875173370319e-06, "loss": 0.0869, "step": 1470 }, { "epoch": 1.0190509179078628, "grad_norm": 0.8974500894546509, "learning_rate": 8.986823855755895e-06, "loss": 0.1017, "step": 1471 }, { "epoch": 1.019743678559058, "grad_norm": 0.9176140427589417, "learning_rate": 8.98613037447989e-06, "loss": 0.1124, "step": 1472 }, { "epoch": 1.020436439210253, "grad_norm": 0.7861721515655518, "learning_rate": 8.985436893203884e-06, "loss": 0.0885, "step": 1473 }, { "epoch": 1.0211291998614478, "grad_norm": 0.8407741189002991, "learning_rate": 8.984743411927879e-06, "loss": 0.0964, "step": 1474 }, { "epoch": 1.0218219605126428, "grad_norm": 0.7929752469062805, "learning_rate": 8.984049930651874e-06, "loss": 0.0907, "step": 1475 }, { "epoch": 1.022514721163838, "grad_norm": 0.8151705265045166, "learning_rate": 8.983356449375867e-06, "loss": 0.0844, "step": 1476 }, { "epoch": 1.023207481815033, "grad_norm": 0.9558108448982239, "learning_rate": 8.982662968099862e-06, "loss": 0.1124, "step": 1477 }, { "epoch": 1.023900242466228, "grad_norm": 0.9324179291725159, "learning_rate": 8.981969486823857e-06, "loss": 0.1119, "step": 1478 }, { "epoch": 1.0245930031174229, "grad_norm": 0.8692248463630676, "learning_rate": 8.981276005547852e-06, "loss": 0.1018, "step": 1479 }, { "epoch": 1.025285763768618, "grad_norm": 0.80369633436203, "learning_rate": 8.980582524271847e-06, "loss": 0.0885, "step": 1480 }, { "epoch": 1.025978524419813, "grad_norm": 0.779391884803772, "learning_rate": 8.97988904299584e-06, "loss": 0.0921, "step": 1481 }, { "epoch": 1.026671285071008, "grad_norm": 0.7021351456642151, "learning_rate": 8.979195561719835e-06, "loss": 0.0738, "step": 1482 }, { "epoch": 1.027364045722203, "grad_norm": 0.7086558938026428, "learning_rate": 8.978502080443828e-06, "loss": 0.0689, "step": 1483 }, { "epoch": 1.028056806373398, "grad_norm": 0.7233958840370178, "learning_rate": 8.977808599167823e-06, "loss": 0.0841, "step": 1484 }, { "epoch": 1.028749567024593, "grad_norm": 0.8533040881156921, "learning_rate": 8.977115117891818e-06, "loss": 0.1049, "step": 1485 }, { "epoch": 1.029442327675788, "grad_norm": 0.8435163497924805, "learning_rate": 8.976421636615813e-06, "loss": 0.0876, "step": 1486 }, { "epoch": 1.030135088326983, "grad_norm": 0.843229353427887, "learning_rate": 8.975728155339806e-06, "loss": 0.0919, "step": 1487 }, { "epoch": 1.0308278489781781, "grad_norm": 0.8355031609535217, "learning_rate": 8.975034674063801e-06, "loss": 0.1028, "step": 1488 }, { "epoch": 1.031520609629373, "grad_norm": 0.9322400093078613, "learning_rate": 8.974341192787796e-06, "loss": 0.0955, "step": 1489 }, { "epoch": 1.032213370280568, "grad_norm": 0.7547881603240967, "learning_rate": 8.97364771151179e-06, "loss": 0.0795, "step": 1490 }, { "epoch": 1.032906130931763, "grad_norm": 0.8543777465820312, "learning_rate": 8.972954230235784e-06, "loss": 0.0909, "step": 1491 }, { "epoch": 1.0335988915829581, "grad_norm": 0.8199926614761353, "learning_rate": 8.972260748959779e-06, "loss": 0.0874, "step": 1492 }, { "epoch": 1.0342916522341532, "grad_norm": 0.9519786238670349, "learning_rate": 8.971567267683772e-06, "loss": 0.1191, "step": 1493 }, { "epoch": 1.034984412885348, "grad_norm": 0.7860363125801086, "learning_rate": 8.970873786407767e-06, "loss": 0.0838, "step": 1494 }, { "epoch": 1.035677173536543, "grad_norm": 0.703731894493103, "learning_rate": 8.970180305131762e-06, "loss": 0.085, "step": 1495 }, { "epoch": 1.0363699341877382, "grad_norm": 0.8515883088111877, "learning_rate": 8.969486823855757e-06, "loss": 0.0977, "step": 1496 }, { "epoch": 1.0370626948389332, "grad_norm": 0.6951834559440613, "learning_rate": 8.968793342579752e-06, "loss": 0.0806, "step": 1497 }, { "epoch": 1.0377554554901283, "grad_norm": 0.852260172367096, "learning_rate": 8.968099861303745e-06, "loss": 0.0939, "step": 1498 }, { "epoch": 1.038448216141323, "grad_norm": 0.8648838996887207, "learning_rate": 8.96740638002774e-06, "loss": 0.1255, "step": 1499 }, { "epoch": 1.0391409767925182, "grad_norm": 0.9019761681556702, "learning_rate": 8.966712898751735e-06, "loss": 0.1007, "step": 1500 }, { "epoch": 1.0398337374437132, "grad_norm": 0.8064924478530884, "learning_rate": 8.966019417475728e-06, "loss": 0.1192, "step": 1501 }, { "epoch": 1.0405264980949083, "grad_norm": 0.8667044043540955, "learning_rate": 8.965325936199723e-06, "loss": 0.0965, "step": 1502 }, { "epoch": 1.0412192587461033, "grad_norm": 0.7102158069610596, "learning_rate": 8.964632454923718e-06, "loss": 0.0817, "step": 1503 }, { "epoch": 1.0419120193972982, "grad_norm": 0.817099928855896, "learning_rate": 8.963938973647713e-06, "loss": 0.0991, "step": 1504 }, { "epoch": 1.0426047800484932, "grad_norm": 0.9012547135353088, "learning_rate": 8.963245492371708e-06, "loss": 0.0949, "step": 1505 }, { "epoch": 1.0432975406996883, "grad_norm": 0.7682470679283142, "learning_rate": 8.962552011095701e-06, "loss": 0.0781, "step": 1506 }, { "epoch": 1.0439903013508833, "grad_norm": 0.7825487852096558, "learning_rate": 8.961858529819696e-06, "loss": 0.0961, "step": 1507 }, { "epoch": 1.0446830620020784, "grad_norm": 0.7672064900398254, "learning_rate": 8.96116504854369e-06, "loss": 0.088, "step": 1508 }, { "epoch": 1.0453758226532732, "grad_norm": 0.7983643412590027, "learning_rate": 8.960471567267684e-06, "loss": 0.087, "step": 1509 }, { "epoch": 1.0460685833044683, "grad_norm": 0.9235342144966125, "learning_rate": 8.95977808599168e-06, "loss": 0.0993, "step": 1510 }, { "epoch": 1.0467613439556633, "grad_norm": 0.8425189852714539, "learning_rate": 8.959084604715673e-06, "loss": 0.093, "step": 1511 }, { "epoch": 1.0474541046068584, "grad_norm": 0.875891923904419, "learning_rate": 8.958391123439667e-06, "loss": 0.0995, "step": 1512 }, { "epoch": 1.0481468652580532, "grad_norm": 0.8321284055709839, "learning_rate": 8.957697642163662e-06, "loss": 0.0837, "step": 1513 }, { "epoch": 1.0488396259092483, "grad_norm": 1.001866340637207, "learning_rate": 8.957004160887657e-06, "loss": 0.0948, "step": 1514 }, { "epoch": 1.0495323865604433, "grad_norm": 0.9124066233634949, "learning_rate": 8.956310679611652e-06, "loss": 0.0972, "step": 1515 }, { "epoch": 1.0502251472116384, "grad_norm": 0.7963526844978333, "learning_rate": 8.955617198335645e-06, "loss": 0.0929, "step": 1516 }, { "epoch": 1.0509179078628335, "grad_norm": 0.7871224880218506, "learning_rate": 8.95492371705964e-06, "loss": 0.0979, "step": 1517 }, { "epoch": 1.0516106685140283, "grad_norm": 0.9203463792800903, "learning_rate": 8.954230235783634e-06, "loss": 0.0992, "step": 1518 }, { "epoch": 1.0523034291652233, "grad_norm": 0.7667852640151978, "learning_rate": 8.953536754507629e-06, "loss": 0.081, "step": 1519 }, { "epoch": 1.0529961898164184, "grad_norm": 0.912794291973114, "learning_rate": 8.952843273231624e-06, "loss": 0.0986, "step": 1520 }, { "epoch": 1.0536889504676135, "grad_norm": 0.7972784042358398, "learning_rate": 8.952149791955618e-06, "loss": 0.0957, "step": 1521 }, { "epoch": 1.0543817111188085, "grad_norm": 0.8647158741950989, "learning_rate": 8.951456310679613e-06, "loss": 0.092, "step": 1522 }, { "epoch": 1.0550744717700034, "grad_norm": 0.8256508111953735, "learning_rate": 8.950762829403607e-06, "loss": 0.0988, "step": 1523 }, { "epoch": 1.0557672324211984, "grad_norm": 0.7755040526390076, "learning_rate": 8.950069348127602e-06, "loss": 0.0925, "step": 1524 }, { "epoch": 1.0564599930723935, "grad_norm": 0.8472358584403992, "learning_rate": 8.949375866851596e-06, "loss": 0.1258, "step": 1525 }, { "epoch": 1.0571527537235885, "grad_norm": 0.8066132068634033, "learning_rate": 8.94868238557559e-06, "loss": 0.0856, "step": 1526 }, { "epoch": 1.0578455143747836, "grad_norm": 0.8940655589103699, "learning_rate": 8.947988904299585e-06, "loss": 0.1053, "step": 1527 }, { "epoch": 1.0585382750259784, "grad_norm": 0.8073523640632629, "learning_rate": 8.947295423023578e-06, "loss": 0.0955, "step": 1528 }, { "epoch": 1.0592310356771735, "grad_norm": 0.853069007396698, "learning_rate": 8.946601941747573e-06, "loss": 0.0928, "step": 1529 }, { "epoch": 1.0599237963283685, "grad_norm": 0.8059660196304321, "learning_rate": 8.945908460471568e-06, "loss": 0.101, "step": 1530 }, { "epoch": 1.0606165569795636, "grad_norm": 0.8703159689903259, "learning_rate": 8.945214979195563e-06, "loss": 0.1047, "step": 1531 }, { "epoch": 1.0613093176307586, "grad_norm": 0.7334396243095398, "learning_rate": 8.944521497919558e-06, "loss": 0.0834, "step": 1532 }, { "epoch": 1.0620020782819535, "grad_norm": 0.8599957823753357, "learning_rate": 8.943828016643551e-06, "loss": 0.107, "step": 1533 }, { "epoch": 1.0626948389331485, "grad_norm": 0.7433030605316162, "learning_rate": 8.943134535367546e-06, "loss": 0.0931, "step": 1534 }, { "epoch": 1.0633875995843436, "grad_norm": 1.012636423110962, "learning_rate": 8.94244105409154e-06, "loss": 0.1233, "step": 1535 }, { "epoch": 1.0640803602355386, "grad_norm": 0.7831315994262695, "learning_rate": 8.941747572815534e-06, "loss": 0.0896, "step": 1536 }, { "epoch": 1.0647731208867337, "grad_norm": 0.8660885691642761, "learning_rate": 8.941054091539529e-06, "loss": 0.1041, "step": 1537 }, { "epoch": 1.0654658815379285, "grad_norm": 0.6929388046264648, "learning_rate": 8.940360610263524e-06, "loss": 0.0682, "step": 1538 }, { "epoch": 1.0661586421891236, "grad_norm": 0.750199556350708, "learning_rate": 8.939667128987519e-06, "loss": 0.0919, "step": 1539 }, { "epoch": 1.0668514028403187, "grad_norm": 0.7908627390861511, "learning_rate": 8.938973647711514e-06, "loss": 0.0901, "step": 1540 }, { "epoch": 1.0675441634915137, "grad_norm": 0.874664306640625, "learning_rate": 8.938280166435507e-06, "loss": 0.1096, "step": 1541 }, { "epoch": 1.0682369241427088, "grad_norm": 0.8638962507247925, "learning_rate": 8.937586685159502e-06, "loss": 0.0977, "step": 1542 }, { "epoch": 1.0689296847939036, "grad_norm": 0.7941625118255615, "learning_rate": 8.936893203883495e-06, "loss": 0.0892, "step": 1543 }, { "epoch": 1.0696224454450987, "grad_norm": 0.7659174799919128, "learning_rate": 8.93619972260749e-06, "loss": 0.0772, "step": 1544 }, { "epoch": 1.0703152060962937, "grad_norm": 0.778826117515564, "learning_rate": 8.935506241331485e-06, "loss": 0.0953, "step": 1545 }, { "epoch": 1.0710079667474888, "grad_norm": 0.8357170224189758, "learning_rate": 8.934812760055478e-06, "loss": 0.1054, "step": 1546 }, { "epoch": 1.0717007273986838, "grad_norm": 0.8270371556282043, "learning_rate": 8.934119278779473e-06, "loss": 0.0939, "step": 1547 }, { "epoch": 1.0723934880498787, "grad_norm": 0.8877065777778625, "learning_rate": 8.933425797503468e-06, "loss": 0.0991, "step": 1548 }, { "epoch": 1.0730862487010737, "grad_norm": 0.9467287063598633, "learning_rate": 8.932732316227463e-06, "loss": 0.0973, "step": 1549 }, { "epoch": 1.0737790093522688, "grad_norm": 0.8550165891647339, "learning_rate": 8.932038834951458e-06, "loss": 0.1029, "step": 1550 }, { "epoch": 1.0744717700034638, "grad_norm": 0.7602420449256897, "learning_rate": 8.931345353675451e-06, "loss": 0.1005, "step": 1551 }, { "epoch": 1.075164530654659, "grad_norm": 0.9009706974029541, "learning_rate": 8.930651872399446e-06, "loss": 0.1016, "step": 1552 }, { "epoch": 1.0758572913058537, "grad_norm": 0.8762458562850952, "learning_rate": 8.92995839112344e-06, "loss": 0.0928, "step": 1553 }, { "epoch": 1.0765500519570488, "grad_norm": 0.8292327523231506, "learning_rate": 8.929264909847434e-06, "loss": 0.1012, "step": 1554 }, { "epoch": 1.0772428126082438, "grad_norm": 0.8724935054779053, "learning_rate": 8.92857142857143e-06, "loss": 0.1072, "step": 1555 }, { "epoch": 1.077935573259439, "grad_norm": 0.8319444060325623, "learning_rate": 8.927877947295424e-06, "loss": 0.0948, "step": 1556 }, { "epoch": 1.078628333910634, "grad_norm": 0.7967873215675354, "learning_rate": 8.927184466019419e-06, "loss": 0.0887, "step": 1557 }, { "epoch": 1.0793210945618288, "grad_norm": 0.8298776745796204, "learning_rate": 8.926490984743412e-06, "loss": 0.0836, "step": 1558 }, { "epoch": 1.0800138552130238, "grad_norm": 0.890299916267395, "learning_rate": 8.925797503467407e-06, "loss": 0.1127, "step": 1559 }, { "epoch": 1.080706615864219, "grad_norm": 0.9299351572990417, "learning_rate": 8.925104022191402e-06, "loss": 0.1148, "step": 1560 }, { "epoch": 1.081399376515414, "grad_norm": 0.7534828186035156, "learning_rate": 8.924410540915395e-06, "loss": 0.0912, "step": 1561 }, { "epoch": 1.082092137166609, "grad_norm": 0.952809751033783, "learning_rate": 8.92371705963939e-06, "loss": 0.1056, "step": 1562 }, { "epoch": 1.0827848978178038, "grad_norm": 0.8834134936332703, "learning_rate": 8.923023578363385e-06, "loss": 0.1075, "step": 1563 }, { "epoch": 1.083477658468999, "grad_norm": 0.7658727765083313, "learning_rate": 8.922330097087379e-06, "loss": 0.0875, "step": 1564 }, { "epoch": 1.084170419120194, "grad_norm": 0.767088770866394, "learning_rate": 8.921636615811373e-06, "loss": 0.1049, "step": 1565 }, { "epoch": 1.084863179771389, "grad_norm": 0.7463728189468384, "learning_rate": 8.920943134535368e-06, "loss": 0.0856, "step": 1566 }, { "epoch": 1.085555940422584, "grad_norm": 0.9736886024475098, "learning_rate": 8.920249653259363e-06, "loss": 0.1124, "step": 1567 }, { "epoch": 1.086248701073779, "grad_norm": 0.847335696220398, "learning_rate": 8.919556171983357e-06, "loss": 0.0988, "step": 1568 }, { "epoch": 1.086941461724974, "grad_norm": 0.696940541267395, "learning_rate": 8.918862690707352e-06, "loss": 0.0836, "step": 1569 }, { "epoch": 1.087634222376169, "grad_norm": 0.7993536591529846, "learning_rate": 8.918169209431346e-06, "loss": 0.0952, "step": 1570 }, { "epoch": 1.088326983027364, "grad_norm": 0.8469255566596985, "learning_rate": 8.91747572815534e-06, "loss": 0.0812, "step": 1571 }, { "epoch": 1.0890197436785591, "grad_norm": 0.9231520295143127, "learning_rate": 8.916782246879335e-06, "loss": 0.0992, "step": 1572 }, { "epoch": 1.089712504329754, "grad_norm": 0.8420063853263855, "learning_rate": 8.91608876560333e-06, "loss": 0.0858, "step": 1573 }, { "epoch": 1.090405264980949, "grad_norm": 1.1117023229599, "learning_rate": 8.915395284327325e-06, "loss": 0.0928, "step": 1574 }, { "epoch": 1.091098025632144, "grad_norm": 0.7346429824829102, "learning_rate": 8.91470180305132e-06, "loss": 0.0895, "step": 1575 }, { "epoch": 1.0917907862833391, "grad_norm": 0.8749075531959534, "learning_rate": 8.914008321775313e-06, "loss": 0.1101, "step": 1576 }, { "epoch": 1.0924835469345342, "grad_norm": 0.840613067150116, "learning_rate": 8.913314840499308e-06, "loss": 0.1001, "step": 1577 }, { "epoch": 1.093176307585729, "grad_norm": 0.89544278383255, "learning_rate": 8.912621359223301e-06, "loss": 0.1043, "step": 1578 }, { "epoch": 1.093869068236924, "grad_norm": 0.9560450315475464, "learning_rate": 8.911927877947296e-06, "loss": 0.1288, "step": 1579 }, { "epoch": 1.0945618288881191, "grad_norm": 0.9753036499023438, "learning_rate": 8.91123439667129e-06, "loss": 0.1171, "step": 1580 }, { "epoch": 1.0952545895393142, "grad_norm": 0.7583956122398376, "learning_rate": 8.910540915395286e-06, "loss": 0.0955, "step": 1581 }, { "epoch": 1.0959473501905093, "grad_norm": 0.7719088196754456, "learning_rate": 8.90984743411928e-06, "loss": 0.1065, "step": 1582 }, { "epoch": 1.096640110841704, "grad_norm": 0.6964689493179321, "learning_rate": 8.909153952843274e-06, "loss": 0.0667, "step": 1583 }, { "epoch": 1.0973328714928992, "grad_norm": 0.8248798847198486, "learning_rate": 8.908460471567269e-06, "loss": 0.0833, "step": 1584 }, { "epoch": 1.0980256321440942, "grad_norm": 0.7117317914962769, "learning_rate": 8.907766990291264e-06, "loss": 0.0902, "step": 1585 }, { "epoch": 1.0987183927952893, "grad_norm": 0.8334735631942749, "learning_rate": 8.907073509015257e-06, "loss": 0.1074, "step": 1586 }, { "epoch": 1.0994111534464843, "grad_norm": 0.8360122442245483, "learning_rate": 8.906380027739252e-06, "loss": 0.1058, "step": 1587 }, { "epoch": 1.1001039140976792, "grad_norm": 0.8382623791694641, "learning_rate": 8.905686546463245e-06, "loss": 0.1137, "step": 1588 }, { "epoch": 1.1007966747488742, "grad_norm": 0.8029754161834717, "learning_rate": 8.90499306518724e-06, "loss": 0.1055, "step": 1589 }, { "epoch": 1.1014894354000693, "grad_norm": 0.778613805770874, "learning_rate": 8.904299583911235e-06, "loss": 0.0887, "step": 1590 }, { "epoch": 1.1021821960512643, "grad_norm": 0.8317983746528625, "learning_rate": 8.90360610263523e-06, "loss": 0.0856, "step": 1591 }, { "epoch": 1.1028749567024594, "grad_norm": 0.8260476589202881, "learning_rate": 8.902912621359225e-06, "loss": 0.1022, "step": 1592 }, { "epoch": 1.1035677173536542, "grad_norm": 0.8466697335243225, "learning_rate": 8.902219140083218e-06, "loss": 0.0942, "step": 1593 }, { "epoch": 1.1042604780048493, "grad_norm": 0.8946962952613831, "learning_rate": 8.901525658807213e-06, "loss": 0.1236, "step": 1594 }, { "epoch": 1.1049532386560443, "grad_norm": 0.8465958833694458, "learning_rate": 8.900832177531208e-06, "loss": 0.1087, "step": 1595 }, { "epoch": 1.1056459993072394, "grad_norm": 0.8597657680511475, "learning_rate": 8.900138696255201e-06, "loss": 0.0858, "step": 1596 }, { "epoch": 1.1063387599584344, "grad_norm": 0.9134311676025391, "learning_rate": 8.899445214979196e-06, "loss": 0.1294, "step": 1597 }, { "epoch": 1.1070315206096293, "grad_norm": 0.7299925684928894, "learning_rate": 8.898751733703191e-06, "loss": 0.0828, "step": 1598 }, { "epoch": 1.1077242812608243, "grad_norm": 0.8390952944755554, "learning_rate": 8.898058252427186e-06, "loss": 0.1073, "step": 1599 }, { "epoch": 1.1084170419120194, "grad_norm": 0.877514660358429, "learning_rate": 8.897364771151181e-06, "loss": 0.0876, "step": 1600 }, { "epoch": 1.1091098025632145, "grad_norm": 0.8509402871131897, "learning_rate": 8.896671289875174e-06, "loss": 0.087, "step": 1601 }, { "epoch": 1.1098025632144095, "grad_norm": 0.8124068975448608, "learning_rate": 8.895977808599169e-06, "loss": 0.0997, "step": 1602 }, { "epoch": 1.1104953238656043, "grad_norm": 0.8765443563461304, "learning_rate": 8.895284327323162e-06, "loss": 0.0911, "step": 1603 }, { "epoch": 1.1111880845167994, "grad_norm": 0.8992905020713806, "learning_rate": 8.894590846047157e-06, "loss": 0.112, "step": 1604 }, { "epoch": 1.1118808451679945, "grad_norm": 0.7098968625068665, "learning_rate": 8.893897364771152e-06, "loss": 0.0778, "step": 1605 }, { "epoch": 1.1125736058191895, "grad_norm": 0.8729726076126099, "learning_rate": 8.893203883495145e-06, "loss": 0.109, "step": 1606 }, { "epoch": 1.1132663664703846, "grad_norm": 0.8122535943984985, "learning_rate": 8.89251040221914e-06, "loss": 0.0805, "step": 1607 }, { "epoch": 1.1139591271215794, "grad_norm": 0.9429517388343811, "learning_rate": 8.891816920943135e-06, "loss": 0.0881, "step": 1608 }, { "epoch": 1.1146518877727745, "grad_norm": 0.8913496136665344, "learning_rate": 8.89112343966713e-06, "loss": 0.097, "step": 1609 }, { "epoch": 1.1153446484239695, "grad_norm": 0.9648658633232117, "learning_rate": 8.890429958391125e-06, "loss": 0.1072, "step": 1610 }, { "epoch": 1.1160374090751646, "grad_norm": 0.7665410041809082, "learning_rate": 8.889736477115118e-06, "loss": 0.0836, "step": 1611 }, { "epoch": 1.1167301697263596, "grad_norm": 0.7544222474098206, "learning_rate": 8.889042995839113e-06, "loss": 0.0799, "step": 1612 }, { "epoch": 1.1174229303775545, "grad_norm": 0.844616711139679, "learning_rate": 8.888349514563107e-06, "loss": 0.0927, "step": 1613 }, { "epoch": 1.1181156910287495, "grad_norm": 0.8389930725097656, "learning_rate": 8.887656033287102e-06, "loss": 0.1, "step": 1614 }, { "epoch": 1.1188084516799446, "grad_norm": 0.8759860396385193, "learning_rate": 8.886962552011096e-06, "loss": 0.095, "step": 1615 }, { "epoch": 1.1195012123311396, "grad_norm": 0.8822001218795776, "learning_rate": 8.886269070735091e-06, "loss": 0.0838, "step": 1616 }, { "epoch": 1.1201939729823347, "grad_norm": 0.7945178747177124, "learning_rate": 8.885575589459086e-06, "loss": 0.1027, "step": 1617 }, { "epoch": 1.1208867336335295, "grad_norm": 0.8539460897445679, "learning_rate": 8.88488210818308e-06, "loss": 0.0953, "step": 1618 }, { "epoch": 1.1215794942847246, "grad_norm": 0.8433850407600403, "learning_rate": 8.884188626907074e-06, "loss": 0.0942, "step": 1619 }, { "epoch": 1.1222722549359196, "grad_norm": 0.964124321937561, "learning_rate": 8.88349514563107e-06, "loss": 0.1065, "step": 1620 }, { "epoch": 1.1229650155871147, "grad_norm": 0.9237002730369568, "learning_rate": 8.882801664355063e-06, "loss": 0.1102, "step": 1621 }, { "epoch": 1.1236577762383098, "grad_norm": 0.7764036655426025, "learning_rate": 8.882108183079058e-06, "loss": 0.0906, "step": 1622 }, { "epoch": 1.1243505368895046, "grad_norm": 0.8339813351631165, "learning_rate": 8.88141470180305e-06, "loss": 0.0985, "step": 1623 }, { "epoch": 1.1250432975406996, "grad_norm": 0.9000864028930664, "learning_rate": 8.880721220527046e-06, "loss": 0.1039, "step": 1624 }, { "epoch": 1.1257360581918947, "grad_norm": 0.728771448135376, "learning_rate": 8.88002773925104e-06, "loss": 0.0855, "step": 1625 }, { "epoch": 1.1264288188430898, "grad_norm": 0.8056138753890991, "learning_rate": 8.879334257975036e-06, "loss": 0.0926, "step": 1626 }, { "epoch": 1.1271215794942848, "grad_norm": 0.8006919026374817, "learning_rate": 8.87864077669903e-06, "loss": 0.1135, "step": 1627 }, { "epoch": 1.1278143401454797, "grad_norm": 0.7326475977897644, "learning_rate": 8.877947295423024e-06, "loss": 0.0942, "step": 1628 }, { "epoch": 1.1285071007966747, "grad_norm": 0.7217555046081543, "learning_rate": 8.877253814147019e-06, "loss": 0.0806, "step": 1629 }, { "epoch": 1.1291998614478698, "grad_norm": 0.8380305171012878, "learning_rate": 8.876560332871014e-06, "loss": 0.1011, "step": 1630 }, { "epoch": 1.1298926220990648, "grad_norm": 0.7469961643218994, "learning_rate": 8.875866851595007e-06, "loss": 0.0755, "step": 1631 }, { "epoch": 1.1305853827502599, "grad_norm": 0.8756946921348572, "learning_rate": 8.875173370319002e-06, "loss": 0.1159, "step": 1632 }, { "epoch": 1.1312781434014547, "grad_norm": 0.7768692970275879, "learning_rate": 8.874479889042997e-06, "loss": 0.0601, "step": 1633 }, { "epoch": 1.1319709040526498, "grad_norm": 0.7950308322906494, "learning_rate": 8.873786407766992e-06, "loss": 0.093, "step": 1634 }, { "epoch": 1.1326636647038448, "grad_norm": 0.7664488554000854, "learning_rate": 8.873092926490987e-06, "loss": 0.0738, "step": 1635 }, { "epoch": 1.1333564253550399, "grad_norm": 0.8197282552719116, "learning_rate": 8.87239944521498e-06, "loss": 0.1013, "step": 1636 }, { "epoch": 1.134049186006235, "grad_norm": 0.9681476950645447, "learning_rate": 8.871705963938975e-06, "loss": 0.107, "step": 1637 }, { "epoch": 1.1347419466574298, "grad_norm": 0.7873309850692749, "learning_rate": 8.871012482662968e-06, "loss": 0.0967, "step": 1638 }, { "epoch": 1.1354347073086248, "grad_norm": 1.0039669275283813, "learning_rate": 8.870319001386963e-06, "loss": 0.1273, "step": 1639 }, { "epoch": 1.13612746795982, "grad_norm": 0.7824607491493225, "learning_rate": 8.869625520110958e-06, "loss": 0.088, "step": 1640 }, { "epoch": 1.136820228611015, "grad_norm": 0.7642351388931274, "learning_rate": 8.868932038834953e-06, "loss": 0.0859, "step": 1641 }, { "epoch": 1.13751298926221, "grad_norm": 0.8798947334289551, "learning_rate": 8.868238557558946e-06, "loss": 0.0877, "step": 1642 }, { "epoch": 1.1382057499134048, "grad_norm": 0.906349778175354, "learning_rate": 8.867545076282941e-06, "loss": 0.1013, "step": 1643 }, { "epoch": 1.1388985105646, "grad_norm": 0.689426600933075, "learning_rate": 8.866851595006936e-06, "loss": 0.0748, "step": 1644 }, { "epoch": 1.139591271215795, "grad_norm": 0.7668915390968323, "learning_rate": 8.866158113730931e-06, "loss": 0.0757, "step": 1645 }, { "epoch": 1.14028403186699, "grad_norm": 0.9249112606048584, "learning_rate": 8.865464632454924e-06, "loss": 0.0987, "step": 1646 }, { "epoch": 1.140976792518185, "grad_norm": 0.7976338267326355, "learning_rate": 8.864771151178919e-06, "loss": 0.0937, "step": 1647 }, { "epoch": 1.14166955316938, "grad_norm": 1.0105061531066895, "learning_rate": 8.864077669902912e-06, "loss": 0.1253, "step": 1648 }, { "epoch": 1.142362313820575, "grad_norm": 0.7926379442214966, "learning_rate": 8.863384188626907e-06, "loss": 0.0799, "step": 1649 }, { "epoch": 1.14305507447177, "grad_norm": 0.7894308567047119, "learning_rate": 8.862690707350902e-06, "loss": 0.0848, "step": 1650 }, { "epoch": 1.143747835122965, "grad_norm": 0.8707621097564697, "learning_rate": 8.861997226074897e-06, "loss": 0.0878, "step": 1651 }, { "epoch": 1.1444405957741601, "grad_norm": 0.8926756978034973, "learning_rate": 8.861303744798892e-06, "loss": 0.0936, "step": 1652 }, { "epoch": 1.145133356425355, "grad_norm": 0.957144021987915, "learning_rate": 8.860610263522885e-06, "loss": 0.0933, "step": 1653 }, { "epoch": 1.14582611707655, "grad_norm": 0.9077473878860474, "learning_rate": 8.85991678224688e-06, "loss": 0.1079, "step": 1654 }, { "epoch": 1.146518877727745, "grad_norm": 0.8787539601325989, "learning_rate": 8.859223300970875e-06, "loss": 0.1079, "step": 1655 }, { "epoch": 1.1472116383789401, "grad_norm": 0.8024150133132935, "learning_rate": 8.858529819694868e-06, "loss": 0.0865, "step": 1656 }, { "epoch": 1.1479043990301352, "grad_norm": 0.8101352453231812, "learning_rate": 8.857836338418863e-06, "loss": 0.0855, "step": 1657 }, { "epoch": 1.14859715968133, "grad_norm": 0.7889779806137085, "learning_rate": 8.857142857142858e-06, "loss": 0.0801, "step": 1658 }, { "epoch": 1.149289920332525, "grad_norm": 0.8348006010055542, "learning_rate": 8.856449375866853e-06, "loss": 0.0987, "step": 1659 }, { "epoch": 1.1499826809837201, "grad_norm": 0.8121004700660706, "learning_rate": 8.855755894590848e-06, "loss": 0.0918, "step": 1660 }, { "epoch": 1.1506754416349152, "grad_norm": 0.8209229111671448, "learning_rate": 8.855062413314841e-06, "loss": 0.0872, "step": 1661 }, { "epoch": 1.1513682022861103, "grad_norm": 0.8757693767547607, "learning_rate": 8.854368932038836e-06, "loss": 0.1056, "step": 1662 }, { "epoch": 1.152060962937305, "grad_norm": 0.7186292409896851, "learning_rate": 8.85367545076283e-06, "loss": 0.081, "step": 1663 }, { "epoch": 1.1527537235885001, "grad_norm": 0.8405160307884216, "learning_rate": 8.852981969486824e-06, "loss": 0.1121, "step": 1664 }, { "epoch": 1.1534464842396952, "grad_norm": 0.8855977058410645, "learning_rate": 8.85228848821082e-06, "loss": 0.0884, "step": 1665 }, { "epoch": 1.1541392448908903, "grad_norm": 0.9416713118553162, "learning_rate": 8.851595006934813e-06, "loss": 0.0909, "step": 1666 }, { "epoch": 1.1548320055420853, "grad_norm": 0.889167308807373, "learning_rate": 8.850901525658808e-06, "loss": 0.1148, "step": 1667 }, { "epoch": 1.1555247661932802, "grad_norm": 0.8325088024139404, "learning_rate": 8.850208044382803e-06, "loss": 0.1016, "step": 1668 }, { "epoch": 1.1562175268444752, "grad_norm": 0.8529247641563416, "learning_rate": 8.849514563106797e-06, "loss": 0.0905, "step": 1669 }, { "epoch": 1.1569102874956703, "grad_norm": 0.8866456747055054, "learning_rate": 8.848821081830792e-06, "loss": 0.1206, "step": 1670 }, { "epoch": 1.1576030481468653, "grad_norm": 0.8046126365661621, "learning_rate": 8.848127600554786e-06, "loss": 0.1144, "step": 1671 }, { "epoch": 1.1582958087980604, "grad_norm": 0.7605208158493042, "learning_rate": 8.84743411927878e-06, "loss": 0.0903, "step": 1672 }, { "epoch": 1.1589885694492552, "grad_norm": 0.829639732837677, "learning_rate": 8.846740638002774e-06, "loss": 0.0975, "step": 1673 }, { "epoch": 1.1596813301004503, "grad_norm": 0.8354451656341553, "learning_rate": 8.846047156726769e-06, "loss": 0.0984, "step": 1674 }, { "epoch": 1.1603740907516453, "grad_norm": 0.8832699656486511, "learning_rate": 8.845353675450764e-06, "loss": 0.1112, "step": 1675 }, { "epoch": 1.1610668514028404, "grad_norm": 0.831802248954773, "learning_rate": 8.844660194174759e-06, "loss": 0.1032, "step": 1676 }, { "epoch": 1.1617596120540354, "grad_norm": 0.96922367811203, "learning_rate": 8.843966712898754e-06, "loss": 0.0947, "step": 1677 }, { "epoch": 1.1624523727052303, "grad_norm": 0.8046149015426636, "learning_rate": 8.843273231622747e-06, "loss": 0.092, "step": 1678 }, { "epoch": 1.1631451333564253, "grad_norm": 0.8756119012832642, "learning_rate": 8.842579750346742e-06, "loss": 0.0863, "step": 1679 }, { "epoch": 1.1638378940076204, "grad_norm": 0.7364367246627808, "learning_rate": 8.841886269070737e-06, "loss": 0.0884, "step": 1680 }, { "epoch": 1.1645306546588154, "grad_norm": 0.7794440984725952, "learning_rate": 8.84119278779473e-06, "loss": 0.0883, "step": 1681 }, { "epoch": 1.1652234153100105, "grad_norm": 0.8520834445953369, "learning_rate": 8.840499306518725e-06, "loss": 0.0972, "step": 1682 }, { "epoch": 1.1659161759612053, "grad_norm": 0.8838192820549011, "learning_rate": 8.839805825242718e-06, "loss": 0.1158, "step": 1683 }, { "epoch": 1.1666089366124004, "grad_norm": 0.6713674068450928, "learning_rate": 8.839112343966713e-06, "loss": 0.0729, "step": 1684 }, { "epoch": 1.1673016972635955, "grad_norm": 0.8523823618888855, "learning_rate": 8.838418862690708e-06, "loss": 0.0849, "step": 1685 }, { "epoch": 1.1679944579147905, "grad_norm": 0.8470174670219421, "learning_rate": 8.837725381414703e-06, "loss": 0.096, "step": 1686 }, { "epoch": 1.1686872185659856, "grad_norm": 0.902706503868103, "learning_rate": 8.837031900138698e-06, "loss": 0.1046, "step": 1687 }, { "epoch": 1.1693799792171804, "grad_norm": 0.7811160087585449, "learning_rate": 8.836338418862691e-06, "loss": 0.0935, "step": 1688 }, { "epoch": 1.1700727398683755, "grad_norm": 0.8322646617889404, "learning_rate": 8.835644937586686e-06, "loss": 0.1052, "step": 1689 }, { "epoch": 1.1707655005195705, "grad_norm": 0.8243199586868286, "learning_rate": 8.834951456310681e-06, "loss": 0.1135, "step": 1690 }, { "epoch": 1.1714582611707656, "grad_norm": 0.8629660606384277, "learning_rate": 8.834257975034674e-06, "loss": 0.0952, "step": 1691 }, { "epoch": 1.1721510218219606, "grad_norm": 1.0917249917984009, "learning_rate": 8.833564493758669e-06, "loss": 0.1336, "step": 1692 }, { "epoch": 1.1728437824731555, "grad_norm": 0.9731141328811646, "learning_rate": 8.832871012482664e-06, "loss": 0.1084, "step": 1693 }, { "epoch": 1.1735365431243505, "grad_norm": 0.8693202137947083, "learning_rate": 8.832177531206659e-06, "loss": 0.1009, "step": 1694 }, { "epoch": 1.1742293037755456, "grad_norm": 0.8254092335700989, "learning_rate": 8.831484049930654e-06, "loss": 0.1141, "step": 1695 }, { "epoch": 1.1749220644267406, "grad_norm": 0.8278314471244812, "learning_rate": 8.830790568654647e-06, "loss": 0.1116, "step": 1696 }, { "epoch": 1.1756148250779357, "grad_norm": 0.8282114267349243, "learning_rate": 8.830097087378642e-06, "loss": 0.0848, "step": 1697 }, { "epoch": 1.1763075857291305, "grad_norm": 0.8601030707359314, "learning_rate": 8.829403606102635e-06, "loss": 0.0974, "step": 1698 }, { "epoch": 1.1770003463803256, "grad_norm": 1.0528501272201538, "learning_rate": 8.82871012482663e-06, "loss": 0.0954, "step": 1699 }, { "epoch": 1.1776931070315206, "grad_norm": 0.8671848773956299, "learning_rate": 8.828016643550625e-06, "loss": 0.0896, "step": 1700 }, { "epoch": 1.1783858676827157, "grad_norm": 1.3647528886795044, "learning_rate": 8.827323162274618e-06, "loss": 0.1201, "step": 1701 }, { "epoch": 1.1790786283339107, "grad_norm": 0.7462443113327026, "learning_rate": 8.826629680998613e-06, "loss": 0.0791, "step": 1702 }, { "epoch": 1.1797713889851056, "grad_norm": 0.8084028959274292, "learning_rate": 8.825936199722608e-06, "loss": 0.0817, "step": 1703 }, { "epoch": 1.1804641496363006, "grad_norm": 0.763846218585968, "learning_rate": 8.825242718446603e-06, "loss": 0.0867, "step": 1704 }, { "epoch": 1.1811569102874957, "grad_norm": 0.826973557472229, "learning_rate": 8.824549237170598e-06, "loss": 0.0959, "step": 1705 }, { "epoch": 1.1818496709386908, "grad_norm": 0.8394191861152649, "learning_rate": 8.823855755894591e-06, "loss": 0.1102, "step": 1706 }, { "epoch": 1.1825424315898858, "grad_norm": 0.9438965916633606, "learning_rate": 8.823162274618586e-06, "loss": 0.1097, "step": 1707 }, { "epoch": 1.1832351922410806, "grad_norm": 0.8429800868034363, "learning_rate": 8.82246879334258e-06, "loss": 0.115, "step": 1708 }, { "epoch": 1.1839279528922757, "grad_norm": 0.7064415216445923, "learning_rate": 8.821775312066574e-06, "loss": 0.0801, "step": 1709 }, { "epoch": 1.1846207135434708, "grad_norm": 0.739035964012146, "learning_rate": 8.82108183079057e-06, "loss": 0.0824, "step": 1710 }, { "epoch": 1.1853134741946658, "grad_norm": 0.7586328387260437, "learning_rate": 8.820388349514564e-06, "loss": 0.0789, "step": 1711 }, { "epoch": 1.1860062348458609, "grad_norm": 0.8423470854759216, "learning_rate": 8.81969486823856e-06, "loss": 0.1163, "step": 1712 }, { "epoch": 1.1866989954970557, "grad_norm": 0.8857660293579102, "learning_rate": 8.819001386962552e-06, "loss": 0.1072, "step": 1713 }, { "epoch": 1.1873917561482508, "grad_norm": 0.9075539112091064, "learning_rate": 8.818307905686547e-06, "loss": 0.0941, "step": 1714 }, { "epoch": 1.1880845167994458, "grad_norm": 0.831018328666687, "learning_rate": 8.817614424410542e-06, "loss": 0.0802, "step": 1715 }, { "epoch": 1.1887772774506409, "grad_norm": 0.774970531463623, "learning_rate": 8.816920943134536e-06, "loss": 0.0829, "step": 1716 }, { "epoch": 1.189470038101836, "grad_norm": 0.8437609672546387, "learning_rate": 8.81622746185853e-06, "loss": 0.0952, "step": 1717 }, { "epoch": 1.1901627987530308, "grad_norm": 0.9007271528244019, "learning_rate": 8.815533980582525e-06, "loss": 0.0861, "step": 1718 }, { "epoch": 1.1908555594042258, "grad_norm": 0.9031360745429993, "learning_rate": 8.814840499306519e-06, "loss": 0.1107, "step": 1719 }, { "epoch": 1.1915483200554209, "grad_norm": 0.8673595190048218, "learning_rate": 8.814147018030514e-06, "loss": 0.1065, "step": 1720 }, { "epoch": 1.192241080706616, "grad_norm": 0.7581225037574768, "learning_rate": 8.813453536754509e-06, "loss": 0.0901, "step": 1721 }, { "epoch": 1.192933841357811, "grad_norm": 0.7888699173927307, "learning_rate": 8.812760055478503e-06, "loss": 0.0818, "step": 1722 }, { "epoch": 1.1936266020090058, "grad_norm": 0.8125446438789368, "learning_rate": 8.812066574202497e-06, "loss": 0.0909, "step": 1723 }, { "epoch": 1.194319362660201, "grad_norm": 0.8669037818908691, "learning_rate": 8.811373092926492e-06, "loss": 0.0983, "step": 1724 }, { "epoch": 1.195012123311396, "grad_norm": 0.753842294216156, "learning_rate": 8.810679611650487e-06, "loss": 0.0855, "step": 1725 }, { "epoch": 1.195704883962591, "grad_norm": 0.8449932336807251, "learning_rate": 8.80998613037448e-06, "loss": 0.0948, "step": 1726 }, { "epoch": 1.196397644613786, "grad_norm": 0.7542533278465271, "learning_rate": 8.809292649098475e-06, "loss": 0.083, "step": 1727 }, { "epoch": 1.197090405264981, "grad_norm": 0.9120563864707947, "learning_rate": 8.80859916782247e-06, "loss": 0.1076, "step": 1728 }, { "epoch": 1.197783165916176, "grad_norm": 0.9268695712089539, "learning_rate": 8.807905686546465e-06, "loss": 0.0992, "step": 1729 }, { "epoch": 1.198475926567371, "grad_norm": 0.8432742953300476, "learning_rate": 8.80721220527046e-06, "loss": 0.0937, "step": 1730 }, { "epoch": 1.199168687218566, "grad_norm": 0.8582000732421875, "learning_rate": 8.806518723994453e-06, "loss": 0.1055, "step": 1731 }, { "epoch": 1.1998614478697611, "grad_norm": 0.8746100664138794, "learning_rate": 8.805825242718448e-06, "loss": 0.0996, "step": 1732 }, { "epoch": 1.200554208520956, "grad_norm": 0.7897584438323975, "learning_rate": 8.805131761442441e-06, "loss": 0.0952, "step": 1733 }, { "epoch": 1.201246969172151, "grad_norm": 0.7623755931854248, "learning_rate": 8.804438280166436e-06, "loss": 0.0806, "step": 1734 }, { "epoch": 1.201939729823346, "grad_norm": 0.8046366572380066, "learning_rate": 8.803744798890431e-06, "loss": 0.0665, "step": 1735 }, { "epoch": 1.2026324904745411, "grad_norm": 0.8854013681411743, "learning_rate": 8.803051317614426e-06, "loss": 0.0926, "step": 1736 }, { "epoch": 1.203325251125736, "grad_norm": 0.8266016244888306, "learning_rate": 8.80235783633842e-06, "loss": 0.086, "step": 1737 }, { "epoch": 1.204018011776931, "grad_norm": 0.8264349699020386, "learning_rate": 8.801664355062414e-06, "loss": 0.0977, "step": 1738 }, { "epoch": 1.204710772428126, "grad_norm": 0.7382345795631409, "learning_rate": 8.800970873786409e-06, "loss": 0.0804, "step": 1739 }, { "epoch": 1.2054035330793211, "grad_norm": 0.7804838418960571, "learning_rate": 8.800277392510404e-06, "loss": 0.0832, "step": 1740 }, { "epoch": 1.2060962937305162, "grad_norm": 0.7389581203460693, "learning_rate": 8.799583911234397e-06, "loss": 0.0901, "step": 1741 }, { "epoch": 1.206789054381711, "grad_norm": 0.8908270001411438, "learning_rate": 8.798890429958392e-06, "loss": 0.0893, "step": 1742 }, { "epoch": 1.207481815032906, "grad_norm": 0.9740622639656067, "learning_rate": 8.798196948682385e-06, "loss": 0.1231, "step": 1743 }, { "epoch": 1.2081745756841011, "grad_norm": 0.813880205154419, "learning_rate": 8.79750346740638e-06, "loss": 0.1011, "step": 1744 }, { "epoch": 1.2088673363352962, "grad_norm": 0.9801640510559082, "learning_rate": 8.796809986130375e-06, "loss": 0.0832, "step": 1745 }, { "epoch": 1.2095600969864913, "grad_norm": 0.8074042201042175, "learning_rate": 8.79611650485437e-06, "loss": 0.0888, "step": 1746 }, { "epoch": 1.210252857637686, "grad_norm": 0.7691810727119446, "learning_rate": 8.795423023578365e-06, "loss": 0.0989, "step": 1747 }, { "epoch": 1.2109456182888811, "grad_norm": 0.8661003112792969, "learning_rate": 8.794729542302358e-06, "loss": 0.1081, "step": 1748 }, { "epoch": 1.2116383789400762, "grad_norm": 0.9518867135047913, "learning_rate": 8.794036061026353e-06, "loss": 0.1168, "step": 1749 }, { "epoch": 1.2123311395912713, "grad_norm": 0.955470085144043, "learning_rate": 8.793342579750348e-06, "loss": 0.1213, "step": 1750 }, { "epoch": 1.2130239002424663, "grad_norm": 0.7917088866233826, "learning_rate": 8.792649098474341e-06, "loss": 0.0994, "step": 1751 }, { "epoch": 1.2137166608936611, "grad_norm": 0.8858113884925842, "learning_rate": 8.791955617198336e-06, "loss": 0.1199, "step": 1752 }, { "epoch": 1.2144094215448562, "grad_norm": 0.8482376337051392, "learning_rate": 8.791262135922331e-06, "loss": 0.0927, "step": 1753 }, { "epoch": 1.2151021821960513, "grad_norm": 0.8622453212738037, "learning_rate": 8.790568654646326e-06, "loss": 0.0942, "step": 1754 }, { "epoch": 1.2157949428472463, "grad_norm": 0.9274044632911682, "learning_rate": 8.789875173370321e-06, "loss": 0.0932, "step": 1755 }, { "epoch": 1.2164877034984414, "grad_norm": 0.802811861038208, "learning_rate": 8.789181692094314e-06, "loss": 0.0765, "step": 1756 }, { "epoch": 1.2171804641496362, "grad_norm": 0.7712103724479675, "learning_rate": 8.78848821081831e-06, "loss": 0.0786, "step": 1757 }, { "epoch": 1.2178732248008313, "grad_norm": 1.124839186668396, "learning_rate": 8.787794729542302e-06, "loss": 0.1003, "step": 1758 }, { "epoch": 1.2185659854520263, "grad_norm": 0.7888901233673096, "learning_rate": 8.787101248266297e-06, "loss": 0.0813, "step": 1759 }, { "epoch": 1.2192587461032214, "grad_norm": 0.9014065265655518, "learning_rate": 8.786407766990292e-06, "loss": 0.1011, "step": 1760 }, { "epoch": 1.2199515067544164, "grad_norm": 0.859272837638855, "learning_rate": 8.785714285714286e-06, "loss": 0.1052, "step": 1761 }, { "epoch": 1.2206442674056113, "grad_norm": 0.7922794222831726, "learning_rate": 8.78502080443828e-06, "loss": 0.0974, "step": 1762 }, { "epoch": 1.2213370280568063, "grad_norm": 0.8734545111656189, "learning_rate": 8.784327323162275e-06, "loss": 0.1074, "step": 1763 }, { "epoch": 1.2220297887080014, "grad_norm": 0.8003416657447815, "learning_rate": 8.78363384188627e-06, "loss": 0.0943, "step": 1764 }, { "epoch": 1.2227225493591964, "grad_norm": 0.7421976327896118, "learning_rate": 8.782940360610265e-06, "loss": 0.1014, "step": 1765 }, { "epoch": 1.2234153100103915, "grad_norm": 0.8193487524986267, "learning_rate": 8.782246879334259e-06, "loss": 0.0969, "step": 1766 }, { "epoch": 1.2241080706615863, "grad_norm": 0.9062775373458862, "learning_rate": 8.781553398058253e-06, "loss": 0.1, "step": 1767 }, { "epoch": 1.2248008313127814, "grad_norm": 0.913105309009552, "learning_rate": 8.780859916782247e-06, "loss": 0.1151, "step": 1768 }, { "epoch": 1.2254935919639764, "grad_norm": 0.7597215175628662, "learning_rate": 8.780166435506242e-06, "loss": 0.0942, "step": 1769 }, { "epoch": 1.2261863526151715, "grad_norm": 0.7712662220001221, "learning_rate": 8.779472954230237e-06, "loss": 0.0864, "step": 1770 }, { "epoch": 1.2268791132663666, "grad_norm": 0.7437800765037537, "learning_rate": 8.778779472954232e-06, "loss": 0.0853, "step": 1771 }, { "epoch": 1.2275718739175614, "grad_norm": 0.8142057061195374, "learning_rate": 8.778085991678226e-06, "loss": 0.1051, "step": 1772 }, { "epoch": 1.2282646345687565, "grad_norm": 0.9158908128738403, "learning_rate": 8.77739251040222e-06, "loss": 0.1038, "step": 1773 }, { "epoch": 1.2289573952199515, "grad_norm": 0.9019196629524231, "learning_rate": 8.776699029126215e-06, "loss": 0.0864, "step": 1774 }, { "epoch": 1.2296501558711466, "grad_norm": 0.8636556267738342, "learning_rate": 8.77600554785021e-06, "loss": 0.1178, "step": 1775 }, { "epoch": 1.2303429165223416, "grad_norm": 0.8372964262962341, "learning_rate": 8.775312066574203e-06, "loss": 0.0662, "step": 1776 }, { "epoch": 1.2310356771735365, "grad_norm": 0.8746647834777832, "learning_rate": 8.774618585298198e-06, "loss": 0.0968, "step": 1777 }, { "epoch": 1.2317284378247315, "grad_norm": 0.9964105486869812, "learning_rate": 8.773925104022191e-06, "loss": 0.1123, "step": 1778 }, { "epoch": 1.2324211984759266, "grad_norm": 0.756689727306366, "learning_rate": 8.773231622746186e-06, "loss": 0.0721, "step": 1779 }, { "epoch": 1.2331139591271216, "grad_norm": 0.8758362531661987, "learning_rate": 8.77253814147018e-06, "loss": 0.1014, "step": 1780 }, { "epoch": 1.2338067197783167, "grad_norm": 0.7719059586524963, "learning_rate": 8.771844660194176e-06, "loss": 0.0949, "step": 1781 }, { "epoch": 1.2344994804295115, "grad_norm": 0.8182286620140076, "learning_rate": 8.77115117891817e-06, "loss": 0.0966, "step": 1782 }, { "epoch": 1.2351922410807066, "grad_norm": 0.8806812167167664, "learning_rate": 8.770457697642164e-06, "loss": 0.1119, "step": 1783 }, { "epoch": 1.2358850017319016, "grad_norm": 0.9253789782524109, "learning_rate": 8.769764216366159e-06, "loss": 0.1036, "step": 1784 }, { "epoch": 1.2365777623830967, "grad_norm": 0.8416339755058289, "learning_rate": 8.769070735090154e-06, "loss": 0.0917, "step": 1785 }, { "epoch": 1.2372705230342917, "grad_norm": 0.8205117583274841, "learning_rate": 8.768377253814147e-06, "loss": 0.098, "step": 1786 }, { "epoch": 1.2379632836854866, "grad_norm": 0.9483963847160339, "learning_rate": 8.767683772538142e-06, "loss": 0.0984, "step": 1787 }, { "epoch": 1.2386560443366816, "grad_norm": 0.9278393387794495, "learning_rate": 8.766990291262137e-06, "loss": 0.1076, "step": 1788 }, { "epoch": 1.2393488049878767, "grad_norm": 0.8407341837882996, "learning_rate": 8.766296809986132e-06, "loss": 0.0859, "step": 1789 }, { "epoch": 1.2400415656390718, "grad_norm": 0.9124855995178223, "learning_rate": 8.765603328710127e-06, "loss": 0.12, "step": 1790 }, { "epoch": 1.2407343262902668, "grad_norm": 0.8021404147148132, "learning_rate": 8.76490984743412e-06, "loss": 0.072, "step": 1791 }, { "epoch": 1.2414270869414616, "grad_norm": 0.8200904130935669, "learning_rate": 8.764216366158115e-06, "loss": 0.0762, "step": 1792 }, { "epoch": 1.2421198475926567, "grad_norm": 0.7664557695388794, "learning_rate": 8.763522884882108e-06, "loss": 0.0912, "step": 1793 }, { "epoch": 1.2428126082438518, "grad_norm": 0.9423385858535767, "learning_rate": 8.762829403606103e-06, "loss": 0.1182, "step": 1794 }, { "epoch": 1.2435053688950468, "grad_norm": 0.8365580439567566, "learning_rate": 8.762135922330098e-06, "loss": 0.0936, "step": 1795 }, { "epoch": 1.2441981295462416, "grad_norm": 0.93754643201828, "learning_rate": 8.761442441054091e-06, "loss": 0.124, "step": 1796 }, { "epoch": 1.2448908901974367, "grad_norm": 0.7680178284645081, "learning_rate": 8.760748959778086e-06, "loss": 0.1028, "step": 1797 }, { "epoch": 1.2455836508486318, "grad_norm": 0.8152678608894348, "learning_rate": 8.760055478502081e-06, "loss": 0.0865, "step": 1798 }, { "epoch": 1.2462764114998268, "grad_norm": 0.7605410218238831, "learning_rate": 8.759361997226076e-06, "loss": 0.0768, "step": 1799 }, { "epoch": 1.2469691721510219, "grad_norm": 0.8376767039299011, "learning_rate": 8.758668515950071e-06, "loss": 0.0966, "step": 1800 }, { "epoch": 1.2476619328022167, "grad_norm": 0.886950671672821, "learning_rate": 8.757975034674064e-06, "loss": 0.1082, "step": 1801 }, { "epoch": 1.2483546934534118, "grad_norm": 0.8822245597839355, "learning_rate": 8.75728155339806e-06, "loss": 0.1032, "step": 1802 }, { "epoch": 1.2490474541046068, "grad_norm": 0.8090283274650574, "learning_rate": 8.756588072122052e-06, "loss": 0.0874, "step": 1803 }, { "epoch": 1.2497402147558019, "grad_norm": 0.8402153253555298, "learning_rate": 8.755894590846047e-06, "loss": 0.1002, "step": 1804 }, { "epoch": 1.250432975406997, "grad_norm": 0.7970601916313171, "learning_rate": 8.755201109570042e-06, "loss": 0.0831, "step": 1805 }, { "epoch": 1.2511257360581918, "grad_norm": 0.7803841233253479, "learning_rate": 8.754507628294037e-06, "loss": 0.0835, "step": 1806 }, { "epoch": 1.2518184967093868, "grad_norm": 0.7126593589782715, "learning_rate": 8.753814147018032e-06, "loss": 0.0867, "step": 1807 }, { "epoch": 1.2525112573605819, "grad_norm": 0.8743826746940613, "learning_rate": 8.753120665742025e-06, "loss": 0.1001, "step": 1808 }, { "epoch": 1.253204018011777, "grad_norm": 0.9614912867546082, "learning_rate": 8.75242718446602e-06, "loss": 0.1229, "step": 1809 }, { "epoch": 1.253896778662972, "grad_norm": 0.8666008710861206, "learning_rate": 8.751733703190015e-06, "loss": 0.0781, "step": 1810 }, { "epoch": 1.2545895393141668, "grad_norm": 0.9381266236305237, "learning_rate": 8.751040221914009e-06, "loss": 0.1314, "step": 1811 }, { "epoch": 1.255282299965362, "grad_norm": 0.9971676468849182, "learning_rate": 8.750346740638003e-06, "loss": 0.1007, "step": 1812 }, { "epoch": 1.255975060616557, "grad_norm": 0.7789368033409119, "learning_rate": 8.749653259361998e-06, "loss": 0.082, "step": 1813 }, { "epoch": 1.256667821267752, "grad_norm": 0.8247684836387634, "learning_rate": 8.748959778085993e-06, "loss": 0.1143, "step": 1814 }, { "epoch": 1.257360581918947, "grad_norm": 0.7430122494697571, "learning_rate": 8.748266296809987e-06, "loss": 0.0772, "step": 1815 }, { "epoch": 1.258053342570142, "grad_norm": 0.8330528140068054, "learning_rate": 8.747572815533982e-06, "loss": 0.1069, "step": 1816 }, { "epoch": 1.258746103221337, "grad_norm": 0.8116832971572876, "learning_rate": 8.746879334257976e-06, "loss": 0.0745, "step": 1817 }, { "epoch": 1.259438863872532, "grad_norm": 0.7854447364807129, "learning_rate": 8.74618585298197e-06, "loss": 0.0876, "step": 1818 }, { "epoch": 1.260131624523727, "grad_norm": 0.8094685673713684, "learning_rate": 8.745492371705965e-06, "loss": 0.0961, "step": 1819 }, { "epoch": 1.2608243851749221, "grad_norm": 0.8866847157478333, "learning_rate": 8.744798890429958e-06, "loss": 0.1133, "step": 1820 }, { "epoch": 1.261517145826117, "grad_norm": 0.7459259033203125, "learning_rate": 8.744105409153953e-06, "loss": 0.0977, "step": 1821 }, { "epoch": 1.262209906477312, "grad_norm": 0.7870854139328003, "learning_rate": 8.743411927877948e-06, "loss": 0.0828, "step": 1822 }, { "epoch": 1.262902667128507, "grad_norm": 0.9214602112770081, "learning_rate": 8.742718446601943e-06, "loss": 0.092, "step": 1823 }, { "epoch": 1.2635954277797021, "grad_norm": 0.7299473285675049, "learning_rate": 8.742024965325938e-06, "loss": 0.0865, "step": 1824 }, { "epoch": 1.2642881884308972, "grad_norm": 0.8461336493492126, "learning_rate": 8.74133148404993e-06, "loss": 0.1011, "step": 1825 }, { "epoch": 1.264980949082092, "grad_norm": 0.7238945960998535, "learning_rate": 8.740638002773926e-06, "loss": 0.0805, "step": 1826 }, { "epoch": 1.265673709733287, "grad_norm": 0.8741490244865417, "learning_rate": 8.73994452149792e-06, "loss": 0.1077, "step": 1827 }, { "epoch": 1.2663664703844821, "grad_norm": 0.7736914157867432, "learning_rate": 8.739251040221914e-06, "loss": 0.0881, "step": 1828 }, { "epoch": 1.2670592310356772, "grad_norm": 0.7423211336135864, "learning_rate": 8.738557558945909e-06, "loss": 0.0844, "step": 1829 }, { "epoch": 1.2677519916868722, "grad_norm": 0.7699253559112549, "learning_rate": 8.737864077669904e-06, "loss": 0.0832, "step": 1830 }, { "epoch": 1.268444752338067, "grad_norm": 0.853696346282959, "learning_rate": 8.737170596393899e-06, "loss": 0.1042, "step": 1831 }, { "epoch": 1.2691375129892621, "grad_norm": 0.7845718860626221, "learning_rate": 8.736477115117894e-06, "loss": 0.0804, "step": 1832 }, { "epoch": 1.2698302736404572, "grad_norm": 0.763395369052887, "learning_rate": 8.735783633841887e-06, "loss": 0.0914, "step": 1833 }, { "epoch": 1.2705230342916523, "grad_norm": 0.8438493013381958, "learning_rate": 8.735090152565882e-06, "loss": 0.0895, "step": 1834 }, { "epoch": 1.2712157949428473, "grad_norm": 0.8871287107467651, "learning_rate": 8.734396671289875e-06, "loss": 0.1106, "step": 1835 }, { "epoch": 1.2719085555940421, "grad_norm": 1.0052287578582764, "learning_rate": 8.73370319001387e-06, "loss": 0.1063, "step": 1836 }, { "epoch": 1.2726013162452372, "grad_norm": 0.7611558437347412, "learning_rate": 8.733009708737865e-06, "loss": 0.09, "step": 1837 }, { "epoch": 1.2732940768964323, "grad_norm": 0.79737788438797, "learning_rate": 8.732316227461858e-06, "loss": 0.0907, "step": 1838 }, { "epoch": 1.2739868375476273, "grad_norm": 1.0511468648910522, "learning_rate": 8.731622746185853e-06, "loss": 0.1066, "step": 1839 }, { "epoch": 1.2746795981988224, "grad_norm": 0.8577345609664917, "learning_rate": 8.730929264909848e-06, "loss": 0.0945, "step": 1840 }, { "epoch": 1.2753723588500172, "grad_norm": 0.9274505972862244, "learning_rate": 8.730235783633843e-06, "loss": 0.0878, "step": 1841 }, { "epoch": 1.2760651195012123, "grad_norm": 0.8447585105895996, "learning_rate": 8.729542302357838e-06, "loss": 0.0964, "step": 1842 }, { "epoch": 1.2767578801524073, "grad_norm": 0.840035617351532, "learning_rate": 8.728848821081831e-06, "loss": 0.0912, "step": 1843 }, { "epoch": 1.2774506408036024, "grad_norm": 0.8527553677558899, "learning_rate": 8.728155339805826e-06, "loss": 0.0734, "step": 1844 }, { "epoch": 1.2781434014547974, "grad_norm": 0.9054072499275208, "learning_rate": 8.72746185852982e-06, "loss": 0.0971, "step": 1845 }, { "epoch": 1.2788361621059923, "grad_norm": 0.8647048473358154, "learning_rate": 8.726768377253814e-06, "loss": 0.0958, "step": 1846 }, { "epoch": 1.2795289227571873, "grad_norm": 0.7765918374061584, "learning_rate": 8.72607489597781e-06, "loss": 0.1003, "step": 1847 }, { "epoch": 1.2802216834083824, "grad_norm": 0.8595507144927979, "learning_rate": 8.725381414701804e-06, "loss": 0.0995, "step": 1848 }, { "epoch": 1.2809144440595774, "grad_norm": 0.9293921589851379, "learning_rate": 8.724687933425799e-06, "loss": 0.0875, "step": 1849 }, { "epoch": 1.2816072047107725, "grad_norm": 0.9870502352714539, "learning_rate": 8.723994452149792e-06, "loss": 0.1334, "step": 1850 }, { "epoch": 1.2822999653619673, "grad_norm": 0.8699167966842651, "learning_rate": 8.723300970873787e-06, "loss": 0.0938, "step": 1851 }, { "epoch": 1.2829927260131624, "grad_norm": 0.7957127690315247, "learning_rate": 8.722607489597782e-06, "loss": 0.0862, "step": 1852 }, { "epoch": 1.2836854866643574, "grad_norm": 0.8202260732650757, "learning_rate": 8.721914008321775e-06, "loss": 0.0869, "step": 1853 }, { "epoch": 1.2843782473155525, "grad_norm": 2.299309015274048, "learning_rate": 8.72122052704577e-06, "loss": 0.1076, "step": 1854 }, { "epoch": 1.2850710079667476, "grad_norm": 0.8096281290054321, "learning_rate": 8.720527045769764e-06, "loss": 0.0915, "step": 1855 }, { "epoch": 1.2857637686179424, "grad_norm": 1.0309594869613647, "learning_rate": 8.719833564493759e-06, "loss": 0.1019, "step": 1856 }, { "epoch": 1.2864565292691374, "grad_norm": 0.7998151183128357, "learning_rate": 8.719140083217753e-06, "loss": 0.1014, "step": 1857 }, { "epoch": 1.2871492899203325, "grad_norm": 0.8655368089675903, "learning_rate": 8.718446601941748e-06, "loss": 0.0925, "step": 1858 }, { "epoch": 1.2878420505715276, "grad_norm": 0.8410249948501587, "learning_rate": 8.717753120665743e-06, "loss": 0.0916, "step": 1859 }, { "epoch": 1.2885348112227226, "grad_norm": 0.7467215657234192, "learning_rate": 8.717059639389737e-06, "loss": 0.0817, "step": 1860 }, { "epoch": 1.2892275718739175, "grad_norm": 0.8425984978675842, "learning_rate": 8.716366158113731e-06, "loss": 0.07, "step": 1861 }, { "epoch": 1.2899203325251125, "grad_norm": 0.8508981466293335, "learning_rate": 8.715672676837726e-06, "loss": 0.0921, "step": 1862 }, { "epoch": 1.2906130931763076, "grad_norm": 0.7180240750312805, "learning_rate": 8.71497919556172e-06, "loss": 0.0877, "step": 1863 }, { "epoch": 1.2913058538275026, "grad_norm": 0.7329303026199341, "learning_rate": 8.714285714285715e-06, "loss": 0.0739, "step": 1864 }, { "epoch": 1.2919986144786977, "grad_norm": 0.8160582184791565, "learning_rate": 8.71359223300971e-06, "loss": 0.0893, "step": 1865 }, { "epoch": 1.2926913751298925, "grad_norm": 0.8358718752861023, "learning_rate": 8.712898751733704e-06, "loss": 0.1096, "step": 1866 }, { "epoch": 1.2933841357810876, "grad_norm": 0.7995262145996094, "learning_rate": 8.7122052704577e-06, "loss": 0.0938, "step": 1867 }, { "epoch": 1.2940768964322826, "grad_norm": 0.7863608598709106, "learning_rate": 8.711511789181693e-06, "loss": 0.0927, "step": 1868 }, { "epoch": 1.2947696570834777, "grad_norm": 0.8459818363189697, "learning_rate": 8.710818307905688e-06, "loss": 0.0969, "step": 1869 }, { "epoch": 1.2954624177346727, "grad_norm": 0.9080449342727661, "learning_rate": 8.71012482662968e-06, "loss": 0.1302, "step": 1870 }, { "epoch": 1.2961551783858676, "grad_norm": 0.7980386018753052, "learning_rate": 8.709431345353676e-06, "loss": 0.0839, "step": 1871 }, { "epoch": 1.2968479390370626, "grad_norm": 0.8454411625862122, "learning_rate": 8.70873786407767e-06, "loss": 0.1082, "step": 1872 }, { "epoch": 1.2975406996882577, "grad_norm": 0.8614441156387329, "learning_rate": 8.708044382801666e-06, "loss": 0.0903, "step": 1873 }, { "epoch": 1.2982334603394527, "grad_norm": 0.8343150615692139, "learning_rate": 8.707350901525659e-06, "loss": 0.1102, "step": 1874 }, { "epoch": 1.2989262209906478, "grad_norm": 0.8563399314880371, "learning_rate": 8.706657420249654e-06, "loss": 0.0959, "step": 1875 }, { "epoch": 1.2996189816418426, "grad_norm": 0.9160473942756653, "learning_rate": 8.705963938973649e-06, "loss": 0.1008, "step": 1876 }, { "epoch": 1.3003117422930377, "grad_norm": 0.8592052459716797, "learning_rate": 8.705270457697644e-06, "loss": 0.1026, "step": 1877 }, { "epoch": 1.3010045029442328, "grad_norm": 0.8042607307434082, "learning_rate": 8.704576976421637e-06, "loss": 0.0802, "step": 1878 }, { "epoch": 1.3016972635954278, "grad_norm": 0.939224362373352, "learning_rate": 8.703883495145632e-06, "loss": 0.1137, "step": 1879 }, { "epoch": 1.3023900242466229, "grad_norm": 0.7538774013519287, "learning_rate": 8.703190013869625e-06, "loss": 0.081, "step": 1880 }, { "epoch": 1.3030827848978177, "grad_norm": 0.8179598450660706, "learning_rate": 8.70249653259362e-06, "loss": 0.0962, "step": 1881 }, { "epoch": 1.3037755455490128, "grad_norm": 0.8050620555877686, "learning_rate": 8.701803051317615e-06, "loss": 0.091, "step": 1882 }, { "epoch": 1.3044683062002078, "grad_norm": 0.9653608798980713, "learning_rate": 8.70110957004161e-06, "loss": 0.125, "step": 1883 }, { "epoch": 1.3051610668514029, "grad_norm": 0.849090039730072, "learning_rate": 8.700416088765605e-06, "loss": 0.0961, "step": 1884 }, { "epoch": 1.305853827502598, "grad_norm": 0.8501788377761841, "learning_rate": 8.699722607489598e-06, "loss": 0.0986, "step": 1885 }, { "epoch": 1.3065465881537928, "grad_norm": 0.9050464630126953, "learning_rate": 8.699029126213593e-06, "loss": 0.1099, "step": 1886 }, { "epoch": 1.3072393488049878, "grad_norm": 0.9707657098770142, "learning_rate": 8.698335644937588e-06, "loss": 0.1288, "step": 1887 }, { "epoch": 1.3079321094561829, "grad_norm": 0.8001958131790161, "learning_rate": 8.697642163661581e-06, "loss": 0.0854, "step": 1888 }, { "epoch": 1.308624870107378, "grad_norm": 0.7052933573722839, "learning_rate": 8.696948682385576e-06, "loss": 0.0676, "step": 1889 }, { "epoch": 1.309317630758573, "grad_norm": 0.9062489867210388, "learning_rate": 8.696255201109571e-06, "loss": 0.0956, "step": 1890 }, { "epoch": 1.3100103914097678, "grad_norm": 0.8515408635139465, "learning_rate": 8.695561719833566e-06, "loss": 0.0821, "step": 1891 }, { "epoch": 1.3107031520609629, "grad_norm": 0.9760435819625854, "learning_rate": 8.69486823855756e-06, "loss": 0.1251, "step": 1892 }, { "epoch": 1.311395912712158, "grad_norm": 0.8155739307403564, "learning_rate": 8.694174757281554e-06, "loss": 0.0897, "step": 1893 }, { "epoch": 1.312088673363353, "grad_norm": 0.7818350791931152, "learning_rate": 8.693481276005549e-06, "loss": 0.0825, "step": 1894 }, { "epoch": 1.312781434014548, "grad_norm": 0.8299257159233093, "learning_rate": 8.692787794729542e-06, "loss": 0.0969, "step": 1895 }, { "epoch": 1.313474194665743, "grad_norm": 0.8357214331626892, "learning_rate": 8.692094313453537e-06, "loss": 0.106, "step": 1896 }, { "epoch": 1.314166955316938, "grad_norm": 0.8854610323905945, "learning_rate": 8.691400832177532e-06, "loss": 0.1114, "step": 1897 }, { "epoch": 1.314859715968133, "grad_norm": 0.9018477201461792, "learning_rate": 8.690707350901525e-06, "loss": 0.0933, "step": 1898 }, { "epoch": 1.315552476619328, "grad_norm": 0.7969275712966919, "learning_rate": 8.69001386962552e-06, "loss": 0.0951, "step": 1899 }, { "epoch": 1.3162452372705231, "grad_norm": 0.8710771799087524, "learning_rate": 8.689320388349515e-06, "loss": 0.1001, "step": 1900 }, { "epoch": 1.316937997921718, "grad_norm": 0.8591422438621521, "learning_rate": 8.68862690707351e-06, "loss": 0.1093, "step": 1901 }, { "epoch": 1.317630758572913, "grad_norm": 0.8055676817893982, "learning_rate": 8.687933425797505e-06, "loss": 0.092, "step": 1902 }, { "epoch": 1.318323519224108, "grad_norm": 0.7591870427131653, "learning_rate": 8.687239944521498e-06, "loss": 0.0896, "step": 1903 }, { "epoch": 1.3190162798753031, "grad_norm": 0.856926679611206, "learning_rate": 8.686546463245493e-06, "loss": 0.0985, "step": 1904 }, { "epoch": 1.3197090405264982, "grad_norm": 0.8678262829780579, "learning_rate": 8.685852981969487e-06, "loss": 0.1183, "step": 1905 }, { "epoch": 1.320401801177693, "grad_norm": 0.7745727300643921, "learning_rate": 8.685159500693481e-06, "loss": 0.0929, "step": 1906 }, { "epoch": 1.321094561828888, "grad_norm": 0.7514280676841736, "learning_rate": 8.684466019417476e-06, "loss": 0.0893, "step": 1907 }, { "epoch": 1.3217873224800831, "grad_norm": 0.877655029296875, "learning_rate": 8.683772538141471e-06, "loss": 0.1119, "step": 1908 }, { "epoch": 1.3224800831312782, "grad_norm": 0.8242598176002502, "learning_rate": 8.683079056865466e-06, "loss": 0.0996, "step": 1909 }, { "epoch": 1.3231728437824732, "grad_norm": 0.8473266363143921, "learning_rate": 8.68238557558946e-06, "loss": 0.1044, "step": 1910 }, { "epoch": 1.323865604433668, "grad_norm": 0.8577867746353149, "learning_rate": 8.681692094313454e-06, "loss": 0.108, "step": 1911 }, { "epoch": 1.3245583650848631, "grad_norm": 0.9692781567573547, "learning_rate": 8.68099861303745e-06, "loss": 0.09, "step": 1912 }, { "epoch": 1.3252511257360582, "grad_norm": 0.8671038746833801, "learning_rate": 8.680305131761443e-06, "loss": 0.1085, "step": 1913 }, { "epoch": 1.3259438863872532, "grad_norm": 0.8092488050460815, "learning_rate": 8.679611650485438e-06, "loss": 0.1093, "step": 1914 }, { "epoch": 1.3266366470384483, "grad_norm": 0.9651378989219666, "learning_rate": 8.67891816920943e-06, "loss": 0.1106, "step": 1915 }, { "epoch": 1.3273294076896431, "grad_norm": 0.7887718081474304, "learning_rate": 8.678224687933426e-06, "loss": 0.0955, "step": 1916 }, { "epoch": 1.3280221683408382, "grad_norm": 0.7525023818016052, "learning_rate": 8.67753120665742e-06, "loss": 0.0897, "step": 1917 }, { "epoch": 1.3287149289920333, "grad_norm": 0.7951431274414062, "learning_rate": 8.676837725381416e-06, "loss": 0.0851, "step": 1918 }, { "epoch": 1.3294076896432283, "grad_norm": 0.9328269362449646, "learning_rate": 8.67614424410541e-06, "loss": 0.1171, "step": 1919 }, { "epoch": 1.3301004502944234, "grad_norm": 0.7530510425567627, "learning_rate": 8.675450762829404e-06, "loss": 0.1047, "step": 1920 }, { "epoch": 1.3307932109456182, "grad_norm": 0.7813425660133362, "learning_rate": 8.674757281553399e-06, "loss": 0.0911, "step": 1921 }, { "epoch": 1.3314859715968133, "grad_norm": 0.7479113936424255, "learning_rate": 8.674063800277394e-06, "loss": 0.0886, "step": 1922 }, { "epoch": 1.3321787322480083, "grad_norm": 0.8434452414512634, "learning_rate": 8.673370319001387e-06, "loss": 0.0905, "step": 1923 }, { "epoch": 1.3328714928992034, "grad_norm": 1.0284795761108398, "learning_rate": 8.672676837725382e-06, "loss": 0.1008, "step": 1924 }, { "epoch": 1.3335642535503984, "grad_norm": 0.9190812706947327, "learning_rate": 8.671983356449377e-06, "loss": 0.1037, "step": 1925 }, { "epoch": 1.3342570142015933, "grad_norm": 0.7740709185600281, "learning_rate": 8.671289875173372e-06, "loss": 0.0791, "step": 1926 }, { "epoch": 1.3349497748527883, "grad_norm": 0.8993537425994873, "learning_rate": 8.670596393897367e-06, "loss": 0.1132, "step": 1927 }, { "epoch": 1.3356425355039834, "grad_norm": 0.6645535826683044, "learning_rate": 8.66990291262136e-06, "loss": 0.0593, "step": 1928 }, { "epoch": 1.3363352961551784, "grad_norm": 0.7269705533981323, "learning_rate": 8.669209431345355e-06, "loss": 0.0836, "step": 1929 }, { "epoch": 1.3370280568063735, "grad_norm": 0.8108569979667664, "learning_rate": 8.668515950069348e-06, "loss": 0.0859, "step": 1930 }, { "epoch": 1.3377208174575683, "grad_norm": 0.8316945433616638, "learning_rate": 8.667822468793343e-06, "loss": 0.0856, "step": 1931 }, { "epoch": 1.3384135781087634, "grad_norm": 0.923198401927948, "learning_rate": 8.667128987517338e-06, "loss": 0.1228, "step": 1932 }, { "epoch": 1.3391063387599584, "grad_norm": 0.8418177366256714, "learning_rate": 8.666435506241331e-06, "loss": 0.1091, "step": 1933 }, { "epoch": 1.3397990994111535, "grad_norm": 0.9837353825569153, "learning_rate": 8.665742024965326e-06, "loss": 0.1016, "step": 1934 }, { "epoch": 1.3404918600623486, "grad_norm": 0.9974249601364136, "learning_rate": 8.665048543689321e-06, "loss": 0.0982, "step": 1935 }, { "epoch": 1.3411846207135434, "grad_norm": 0.9031357169151306, "learning_rate": 8.664355062413316e-06, "loss": 0.0735, "step": 1936 }, { "epoch": 1.3418773813647384, "grad_norm": 0.9560807943344116, "learning_rate": 8.66366158113731e-06, "loss": 0.1073, "step": 1937 }, { "epoch": 1.3425701420159335, "grad_norm": 0.9520257115364075, "learning_rate": 8.662968099861304e-06, "loss": 0.0958, "step": 1938 }, { "epoch": 1.3432629026671286, "grad_norm": 0.8354464173316956, "learning_rate": 8.662274618585299e-06, "loss": 0.107, "step": 1939 }, { "epoch": 1.3439556633183236, "grad_norm": 0.8196549415588379, "learning_rate": 8.661581137309292e-06, "loss": 0.0941, "step": 1940 }, { "epoch": 1.3446484239695184, "grad_norm": 0.749197244644165, "learning_rate": 8.660887656033287e-06, "loss": 0.0886, "step": 1941 }, { "epoch": 1.3453411846207135, "grad_norm": 0.8296682238578796, "learning_rate": 8.660194174757282e-06, "loss": 0.0855, "step": 1942 }, { "epoch": 1.3460339452719086, "grad_norm": 1.4121226072311401, "learning_rate": 8.659500693481277e-06, "loss": 0.1187, "step": 1943 }, { "epoch": 1.3467267059231036, "grad_norm": 0.8417717218399048, "learning_rate": 8.658807212205272e-06, "loss": 0.095, "step": 1944 }, { "epoch": 1.3474194665742987, "grad_norm": 0.8696645498275757, "learning_rate": 8.658113730929265e-06, "loss": 0.0884, "step": 1945 }, { "epoch": 1.3481122272254935, "grad_norm": 0.831957221031189, "learning_rate": 8.65742024965326e-06, "loss": 0.0978, "step": 1946 }, { "epoch": 1.3488049878766886, "grad_norm": 0.8580504655838013, "learning_rate": 8.656726768377255e-06, "loss": 0.0918, "step": 1947 }, { "epoch": 1.3494977485278836, "grad_norm": 0.7921102046966553, "learning_rate": 8.656033287101248e-06, "loss": 0.0804, "step": 1948 }, { "epoch": 1.3501905091790787, "grad_norm": 0.9082995653152466, "learning_rate": 8.655339805825243e-06, "loss": 0.0988, "step": 1949 }, { "epoch": 1.3508832698302737, "grad_norm": 0.8123347163200378, "learning_rate": 8.654646324549238e-06, "loss": 0.0859, "step": 1950 }, { "epoch": 1.3515760304814686, "grad_norm": 0.8211560845375061, "learning_rate": 8.653952843273231e-06, "loss": 0.09, "step": 1951 }, { "epoch": 1.3522687911326636, "grad_norm": 0.909602701663971, "learning_rate": 8.653259361997226e-06, "loss": 0.0996, "step": 1952 }, { "epoch": 1.3529615517838587, "grad_norm": 0.8661556243896484, "learning_rate": 8.652565880721221e-06, "loss": 0.098, "step": 1953 }, { "epoch": 1.3536543124350537, "grad_norm": 0.8467791080474854, "learning_rate": 8.651872399445216e-06, "loss": 0.0993, "step": 1954 }, { "epoch": 1.3543470730862488, "grad_norm": 0.8431129455566406, "learning_rate": 8.65117891816921e-06, "loss": 0.1165, "step": 1955 }, { "epoch": 1.3550398337374436, "grad_norm": 0.839606761932373, "learning_rate": 8.650485436893204e-06, "loss": 0.1, "step": 1956 }, { "epoch": 1.3557325943886387, "grad_norm": 0.8434377908706665, "learning_rate": 8.6497919556172e-06, "loss": 0.103, "step": 1957 }, { "epoch": 1.3564253550398337, "grad_norm": 0.9461360573768616, "learning_rate": 8.649098474341193e-06, "loss": 0.1179, "step": 1958 }, { "epoch": 1.3571181156910288, "grad_norm": 0.9228819608688354, "learning_rate": 8.648404993065188e-06, "loss": 0.1052, "step": 1959 }, { "epoch": 1.3578108763422239, "grad_norm": 0.8292375802993774, "learning_rate": 8.647711511789182e-06, "loss": 0.1027, "step": 1960 }, { "epoch": 1.3585036369934187, "grad_norm": 0.8803178668022156, "learning_rate": 8.647018030513177e-06, "loss": 0.1017, "step": 1961 }, { "epoch": 1.3591963976446138, "grad_norm": 0.810207724571228, "learning_rate": 8.646324549237172e-06, "loss": 0.0876, "step": 1962 }, { "epoch": 1.3598891582958088, "grad_norm": 0.8565738201141357, "learning_rate": 8.645631067961166e-06, "loss": 0.0985, "step": 1963 }, { "epoch": 1.3605819189470039, "grad_norm": 0.7794263362884521, "learning_rate": 8.64493758668516e-06, "loss": 0.0935, "step": 1964 }, { "epoch": 1.361274679598199, "grad_norm": 0.8222514390945435, "learning_rate": 8.644244105409154e-06, "loss": 0.0927, "step": 1965 }, { "epoch": 1.3619674402493938, "grad_norm": 0.8469381332397461, "learning_rate": 8.643550624133149e-06, "loss": 0.0836, "step": 1966 }, { "epoch": 1.3626602009005888, "grad_norm": 0.7348058223724365, "learning_rate": 8.642857142857144e-06, "loss": 0.0902, "step": 1967 }, { "epoch": 1.3633529615517839, "grad_norm": 0.8116043210029602, "learning_rate": 8.642163661581139e-06, "loss": 0.0962, "step": 1968 }, { "epoch": 1.364045722202979, "grad_norm": 0.786858081817627, "learning_rate": 8.641470180305133e-06, "loss": 0.0861, "step": 1969 }, { "epoch": 1.364738482854174, "grad_norm": 0.9200820326805115, "learning_rate": 8.640776699029127e-06, "loss": 0.1187, "step": 1970 }, { "epoch": 1.3654312435053688, "grad_norm": 0.8893924951553345, "learning_rate": 8.640083217753122e-06, "loss": 0.1166, "step": 1971 }, { "epoch": 1.3661240041565639, "grad_norm": 0.9585806727409363, "learning_rate": 8.639389736477117e-06, "loss": 0.1124, "step": 1972 }, { "epoch": 1.366816764807759, "grad_norm": 0.9382540583610535, "learning_rate": 8.63869625520111e-06, "loss": 0.1135, "step": 1973 }, { "epoch": 1.367509525458954, "grad_norm": 0.8802511096000671, "learning_rate": 8.638002773925105e-06, "loss": 0.089, "step": 1974 }, { "epoch": 1.368202286110149, "grad_norm": 0.8175269365310669, "learning_rate": 8.637309292649098e-06, "loss": 0.116, "step": 1975 }, { "epoch": 1.3688950467613439, "grad_norm": 0.9394797682762146, "learning_rate": 8.636615811373093e-06, "loss": 0.0898, "step": 1976 }, { "epoch": 1.369587807412539, "grad_norm": 0.8105082511901855, "learning_rate": 8.635922330097088e-06, "loss": 0.0894, "step": 1977 }, { "epoch": 1.370280568063734, "grad_norm": 0.9872980713844299, "learning_rate": 8.635228848821083e-06, "loss": 0.0954, "step": 1978 }, { "epoch": 1.370973328714929, "grad_norm": 0.8237231373786926, "learning_rate": 8.634535367545078e-06, "loss": 0.1074, "step": 1979 }, { "epoch": 1.371666089366124, "grad_norm": 0.9056423902511597, "learning_rate": 8.633841886269071e-06, "loss": 0.0867, "step": 1980 }, { "epoch": 1.372358850017319, "grad_norm": 0.8608441948890686, "learning_rate": 8.633148404993066e-06, "loss": 0.089, "step": 1981 }, { "epoch": 1.373051610668514, "grad_norm": 0.8657476902008057, "learning_rate": 8.63245492371706e-06, "loss": 0.0989, "step": 1982 }, { "epoch": 1.373744371319709, "grad_norm": 0.8708510994911194, "learning_rate": 8.631761442441054e-06, "loss": 0.094, "step": 1983 }, { "epoch": 1.3744371319709041, "grad_norm": 0.9043082594871521, "learning_rate": 8.631067961165049e-06, "loss": 0.104, "step": 1984 }, { "epoch": 1.3751298926220992, "grad_norm": 0.8037663102149963, "learning_rate": 8.630374479889044e-06, "loss": 0.0807, "step": 1985 }, { "epoch": 1.375822653273294, "grad_norm": 0.8416350483894348, "learning_rate": 8.629680998613039e-06, "loss": 0.0993, "step": 1986 }, { "epoch": 1.376515413924489, "grad_norm": 0.7926865816116333, "learning_rate": 8.628987517337034e-06, "loss": 0.0933, "step": 1987 }, { "epoch": 1.3772081745756841, "grad_norm": 0.8975657820701599, "learning_rate": 8.628294036061027e-06, "loss": 0.0985, "step": 1988 }, { "epoch": 1.3779009352268792, "grad_norm": 0.9110676050186157, "learning_rate": 8.627600554785022e-06, "loss": 0.0944, "step": 1989 }, { "epoch": 1.3785936958780742, "grad_norm": 0.9367595911026001, "learning_rate": 8.626907073509015e-06, "loss": 0.0975, "step": 1990 }, { "epoch": 1.379286456529269, "grad_norm": 0.7948849201202393, "learning_rate": 8.62621359223301e-06, "loss": 0.0892, "step": 1991 }, { "epoch": 1.3799792171804641, "grad_norm": 0.8453758358955383, "learning_rate": 8.625520110957005e-06, "loss": 0.1105, "step": 1992 }, { "epoch": 1.3806719778316592, "grad_norm": 0.864433228969574, "learning_rate": 8.624826629680998e-06, "loss": 0.1162, "step": 1993 }, { "epoch": 1.3813647384828542, "grad_norm": 0.8461838960647583, "learning_rate": 8.624133148404993e-06, "loss": 0.0974, "step": 1994 }, { "epoch": 1.3820574991340493, "grad_norm": 0.9221845269203186, "learning_rate": 8.623439667128988e-06, "loss": 0.0949, "step": 1995 }, { "epoch": 1.3827502597852441, "grad_norm": 0.7963080406188965, "learning_rate": 8.622746185852983e-06, "loss": 0.0912, "step": 1996 }, { "epoch": 1.3834430204364392, "grad_norm": 0.7928188443183899, "learning_rate": 8.622052704576978e-06, "loss": 0.098, "step": 1997 }, { "epoch": 1.3841357810876342, "grad_norm": 0.7823619842529297, "learning_rate": 8.621359223300971e-06, "loss": 0.0901, "step": 1998 }, { "epoch": 1.3848285417388293, "grad_norm": 1.0484232902526855, "learning_rate": 8.620665742024966e-06, "loss": 0.1024, "step": 1999 }, { "epoch": 1.3855213023900244, "grad_norm": 0.9590020775794983, "learning_rate": 8.61997226074896e-06, "loss": 0.1206, "step": 2000 }, { "epoch": 1.3862140630412192, "grad_norm": 0.8306148648262024, "learning_rate": 8.619278779472954e-06, "loss": 0.0977, "step": 2001 }, { "epoch": 1.3869068236924142, "grad_norm": 0.7876715064048767, "learning_rate": 8.61858529819695e-06, "loss": 0.0891, "step": 2002 }, { "epoch": 1.3875995843436093, "grad_norm": 1.0013034343719482, "learning_rate": 8.617891816920944e-06, "loss": 0.1069, "step": 2003 }, { "epoch": 1.3882923449948044, "grad_norm": 0.7736858129501343, "learning_rate": 8.61719833564494e-06, "loss": 0.0832, "step": 2004 }, { "epoch": 1.3889851056459994, "grad_norm": 0.7988465428352356, "learning_rate": 8.616504854368932e-06, "loss": 0.0932, "step": 2005 }, { "epoch": 1.3896778662971943, "grad_norm": 0.8956063389778137, "learning_rate": 8.615811373092927e-06, "loss": 0.1014, "step": 2006 }, { "epoch": 1.3903706269483893, "grad_norm": 0.8395360112190247, "learning_rate": 8.615117891816922e-06, "loss": 0.1008, "step": 2007 }, { "epoch": 1.3910633875995844, "grad_norm": 0.79246985912323, "learning_rate": 8.614424410540916e-06, "loss": 0.0706, "step": 2008 }, { "epoch": 1.3917561482507794, "grad_norm": 0.8563838601112366, "learning_rate": 8.61373092926491e-06, "loss": 0.1176, "step": 2009 }, { "epoch": 1.3924489089019745, "grad_norm": 1.1236284971237183, "learning_rate": 8.613037447988904e-06, "loss": 0.1097, "step": 2010 }, { "epoch": 1.3931416695531693, "grad_norm": 0.8628478646278381, "learning_rate": 8.612343966712899e-06, "loss": 0.0891, "step": 2011 }, { "epoch": 1.3938344302043644, "grad_norm": 0.8011003732681274, "learning_rate": 8.611650485436894e-06, "loss": 0.0916, "step": 2012 }, { "epoch": 1.3945271908555594, "grad_norm": 0.8925850987434387, "learning_rate": 8.610957004160889e-06, "loss": 0.0998, "step": 2013 }, { "epoch": 1.3952199515067545, "grad_norm": 0.8047410249710083, "learning_rate": 8.610263522884883e-06, "loss": 0.1053, "step": 2014 }, { "epoch": 1.3959127121579495, "grad_norm": 0.7683031558990479, "learning_rate": 8.609570041608877e-06, "loss": 0.0925, "step": 2015 }, { "epoch": 1.3966054728091444, "grad_norm": 0.7874264717102051, "learning_rate": 8.608876560332872e-06, "loss": 0.0862, "step": 2016 }, { "epoch": 1.3972982334603394, "grad_norm": 0.7253385186195374, "learning_rate": 8.608183079056867e-06, "loss": 0.0706, "step": 2017 }, { "epoch": 1.3979909941115345, "grad_norm": 0.7991751432418823, "learning_rate": 8.60748959778086e-06, "loss": 0.1014, "step": 2018 }, { "epoch": 1.3986837547627295, "grad_norm": 0.7393742203712463, "learning_rate": 8.606796116504855e-06, "loss": 0.0831, "step": 2019 }, { "epoch": 1.3993765154139246, "grad_norm": 0.8541518449783325, "learning_rate": 8.60610263522885e-06, "loss": 0.0957, "step": 2020 }, { "epoch": 1.4000692760651194, "grad_norm": 0.8139171600341797, "learning_rate": 8.605409153952845e-06, "loss": 0.0888, "step": 2021 }, { "epoch": 1.4007620367163145, "grad_norm": 0.8328967094421387, "learning_rate": 8.60471567267684e-06, "loss": 0.0974, "step": 2022 }, { "epoch": 1.4014547973675096, "grad_norm": 0.7323856949806213, "learning_rate": 8.604022191400833e-06, "loss": 0.0915, "step": 2023 }, { "epoch": 1.4021475580187046, "grad_norm": 0.8023518919944763, "learning_rate": 8.603328710124828e-06, "loss": 0.0882, "step": 2024 }, { "epoch": 1.4028403186698997, "grad_norm": 0.793217122554779, "learning_rate": 8.602635228848821e-06, "loss": 0.0768, "step": 2025 }, { "epoch": 1.4035330793210945, "grad_norm": 0.8914885520935059, "learning_rate": 8.601941747572816e-06, "loss": 0.0755, "step": 2026 }, { "epoch": 1.4042258399722896, "grad_norm": 0.8099649548530579, "learning_rate": 8.60124826629681e-06, "loss": 0.0913, "step": 2027 }, { "epoch": 1.4049186006234846, "grad_norm": 0.8921623229980469, "learning_rate": 8.600554785020804e-06, "loss": 0.0978, "step": 2028 }, { "epoch": 1.4056113612746797, "grad_norm": 0.8204141855239868, "learning_rate": 8.599861303744799e-06, "loss": 0.094, "step": 2029 }, { "epoch": 1.4063041219258747, "grad_norm": 0.9586772918701172, "learning_rate": 8.599167822468794e-06, "loss": 0.1301, "step": 2030 }, { "epoch": 1.4069968825770696, "grad_norm": 0.9210357666015625, "learning_rate": 8.598474341192789e-06, "loss": 0.1078, "step": 2031 }, { "epoch": 1.4076896432282646, "grad_norm": 0.8714752793312073, "learning_rate": 8.597780859916784e-06, "loss": 0.0954, "step": 2032 }, { "epoch": 1.4083824038794597, "grad_norm": 0.75909423828125, "learning_rate": 8.597087378640777e-06, "loss": 0.0835, "step": 2033 }, { "epoch": 1.4090751645306547, "grad_norm": 1.0207693576812744, "learning_rate": 8.596393897364772e-06, "loss": 0.1364, "step": 2034 }, { "epoch": 1.4097679251818498, "grad_norm": 0.7902257442474365, "learning_rate": 8.595700416088765e-06, "loss": 0.0804, "step": 2035 }, { "epoch": 1.4104606858330446, "grad_norm": 0.8312771916389465, "learning_rate": 8.59500693481276e-06, "loss": 0.1076, "step": 2036 }, { "epoch": 1.4111534464842397, "grad_norm": 0.8642279505729675, "learning_rate": 8.594313453536755e-06, "loss": 0.087, "step": 2037 }, { "epoch": 1.4118462071354347, "grad_norm": 3.088615894317627, "learning_rate": 8.59361997226075e-06, "loss": 0.1102, "step": 2038 }, { "epoch": 1.4125389677866298, "grad_norm": 0.8367988467216492, "learning_rate": 8.592926490984745e-06, "loss": 0.1022, "step": 2039 }, { "epoch": 1.4132317284378249, "grad_norm": 0.8424378633499146, "learning_rate": 8.592233009708738e-06, "loss": 0.1064, "step": 2040 }, { "epoch": 1.4139244890890197, "grad_norm": 0.942853569984436, "learning_rate": 8.591539528432733e-06, "loss": 0.1177, "step": 2041 }, { "epoch": 1.4146172497402147, "grad_norm": 0.8797265887260437, "learning_rate": 8.590846047156728e-06, "loss": 0.0967, "step": 2042 }, { "epoch": 1.4153100103914098, "grad_norm": 0.7765311002731323, "learning_rate": 8.590152565880721e-06, "loss": 0.0707, "step": 2043 }, { "epoch": 1.4160027710426049, "grad_norm": 0.873090922832489, "learning_rate": 8.589459084604716e-06, "loss": 0.0938, "step": 2044 }, { "epoch": 1.4166955316938, "grad_norm": 0.9923801422119141, "learning_rate": 8.588765603328711e-06, "loss": 0.1312, "step": 2045 }, { "epoch": 1.4173882923449947, "grad_norm": 0.9018128514289856, "learning_rate": 8.588072122052706e-06, "loss": 0.0949, "step": 2046 }, { "epoch": 1.4180810529961898, "grad_norm": 0.799936056137085, "learning_rate": 8.5873786407767e-06, "loss": 0.0841, "step": 2047 }, { "epoch": 1.4187738136473849, "grad_norm": 0.789219856262207, "learning_rate": 8.586685159500694e-06, "loss": 0.094, "step": 2048 }, { "epoch": 1.41946657429858, "grad_norm": 0.8584476113319397, "learning_rate": 8.58599167822469e-06, "loss": 0.0955, "step": 2049 }, { "epoch": 1.420159334949775, "grad_norm": 0.7601639032363892, "learning_rate": 8.585298196948682e-06, "loss": 0.0789, "step": 2050 }, { "epoch": 1.4208520956009698, "grad_norm": 0.8748162984848022, "learning_rate": 8.584604715672677e-06, "loss": 0.0976, "step": 2051 }, { "epoch": 1.4215448562521649, "grad_norm": 0.8095703125, "learning_rate": 8.583911234396672e-06, "loss": 0.1005, "step": 2052 }, { "epoch": 1.42223761690336, "grad_norm": 0.8859317302703857, "learning_rate": 8.583217753120666e-06, "loss": 0.1006, "step": 2053 }, { "epoch": 1.422930377554555, "grad_norm": 0.790725827217102, "learning_rate": 8.58252427184466e-06, "loss": 0.0951, "step": 2054 }, { "epoch": 1.42362313820575, "grad_norm": 0.7737709879875183, "learning_rate": 8.581830790568655e-06, "loss": 0.0848, "step": 2055 }, { "epoch": 1.4243158988569449, "grad_norm": 0.7824344038963318, "learning_rate": 8.58113730929265e-06, "loss": 0.0911, "step": 2056 }, { "epoch": 1.42500865950814, "grad_norm": 0.8980814814567566, "learning_rate": 8.580443828016645e-06, "loss": 0.1004, "step": 2057 }, { "epoch": 1.425701420159335, "grad_norm": 0.8582302927970886, "learning_rate": 8.579750346740638e-06, "loss": 0.1103, "step": 2058 }, { "epoch": 1.42639418081053, "grad_norm": 0.9365726113319397, "learning_rate": 8.579056865464633e-06, "loss": 0.1141, "step": 2059 }, { "epoch": 1.427086941461725, "grad_norm": 0.856505811214447, "learning_rate": 8.578363384188627e-06, "loss": 0.1085, "step": 2060 }, { "epoch": 1.42777970211292, "grad_norm": 1.071853756904602, "learning_rate": 8.577669902912622e-06, "loss": 0.1094, "step": 2061 }, { "epoch": 1.428472462764115, "grad_norm": 0.9276185035705566, "learning_rate": 8.576976421636617e-06, "loss": 0.116, "step": 2062 }, { "epoch": 1.42916522341531, "grad_norm": 0.8712035417556763, "learning_rate": 8.576282940360611e-06, "loss": 0.097, "step": 2063 }, { "epoch": 1.429857984066505, "grad_norm": 0.7674479484558105, "learning_rate": 8.575589459084606e-06, "loss": 0.093, "step": 2064 }, { "epoch": 1.4305507447177002, "grad_norm": 1.0586395263671875, "learning_rate": 8.5748959778086e-06, "loss": 0.1034, "step": 2065 }, { "epoch": 1.431243505368895, "grad_norm": 0.8353042602539062, "learning_rate": 8.574202496532595e-06, "loss": 0.1023, "step": 2066 }, { "epoch": 1.43193626602009, "grad_norm": 0.8541135191917419, "learning_rate": 8.57350901525659e-06, "loss": 0.1047, "step": 2067 }, { "epoch": 1.432629026671285, "grad_norm": 0.7746983766555786, "learning_rate": 8.572815533980583e-06, "loss": 0.0871, "step": 2068 }, { "epoch": 1.43332178732248, "grad_norm": 0.854115903377533, "learning_rate": 8.572122052704578e-06, "loss": 0.0951, "step": 2069 }, { "epoch": 1.4340145479736752, "grad_norm": 0.8405554890632629, "learning_rate": 8.571428571428571e-06, "loss": 0.0961, "step": 2070 }, { "epoch": 1.43470730862487, "grad_norm": 0.8547260165214539, "learning_rate": 8.570735090152566e-06, "loss": 0.0731, "step": 2071 }, { "epoch": 1.4354000692760651, "grad_norm": 0.930776834487915, "learning_rate": 8.57004160887656e-06, "loss": 0.0952, "step": 2072 }, { "epoch": 1.4360928299272602, "grad_norm": 0.8687021136283875, "learning_rate": 8.569348127600556e-06, "loss": 0.084, "step": 2073 }, { "epoch": 1.436785590578455, "grad_norm": 0.8948690891265869, "learning_rate": 8.56865464632455e-06, "loss": 0.0963, "step": 2074 }, { "epoch": 1.4374783512296503, "grad_norm": 0.7903040051460266, "learning_rate": 8.567961165048544e-06, "loss": 0.0876, "step": 2075 }, { "epoch": 1.4381711118808451, "grad_norm": 0.8380088210105896, "learning_rate": 8.567267683772539e-06, "loss": 0.1008, "step": 2076 }, { "epoch": 1.4388638725320402, "grad_norm": 0.8036629557609558, "learning_rate": 8.566574202496534e-06, "loss": 0.0767, "step": 2077 }, { "epoch": 1.4395566331832352, "grad_norm": 0.8765242695808411, "learning_rate": 8.565880721220527e-06, "loss": 0.0879, "step": 2078 }, { "epoch": 1.44024939383443, "grad_norm": 0.9223285913467407, "learning_rate": 8.565187239944522e-06, "loss": 0.1053, "step": 2079 }, { "epoch": 1.4409421544856253, "grad_norm": 0.8735775351524353, "learning_rate": 8.564493758668517e-06, "loss": 0.0936, "step": 2080 }, { "epoch": 1.4416349151368202, "grad_norm": 0.9525173306465149, "learning_rate": 8.563800277392512e-06, "loss": 0.09, "step": 2081 }, { "epoch": 1.4423276757880152, "grad_norm": 0.8726947903633118, "learning_rate": 8.563106796116507e-06, "loss": 0.096, "step": 2082 }, { "epoch": 1.4430204364392103, "grad_norm": 0.9291168451309204, "learning_rate": 8.5624133148405e-06, "loss": 0.1142, "step": 2083 }, { "epoch": 1.4437131970904051, "grad_norm": 0.8167150616645813, "learning_rate": 8.561719833564495e-06, "loss": 0.081, "step": 2084 }, { "epoch": 1.4444059577416004, "grad_norm": 0.8544431328773499, "learning_rate": 8.561026352288488e-06, "loss": 0.0983, "step": 2085 }, { "epoch": 1.4450987183927952, "grad_norm": 0.8344400525093079, "learning_rate": 8.560332871012483e-06, "loss": 0.0905, "step": 2086 }, { "epoch": 1.4457914790439903, "grad_norm": 0.948971688747406, "learning_rate": 8.559639389736478e-06, "loss": 0.1133, "step": 2087 }, { "epoch": 1.4464842396951854, "grad_norm": 0.9090603590011597, "learning_rate": 8.558945908460471e-06, "loss": 0.0923, "step": 2088 }, { "epoch": 1.4471770003463802, "grad_norm": 0.9278730154037476, "learning_rate": 8.558252427184466e-06, "loss": 0.1105, "step": 2089 }, { "epoch": 1.4478697609975755, "grad_norm": 1.03156316280365, "learning_rate": 8.557558945908461e-06, "loss": 0.1029, "step": 2090 }, { "epoch": 1.4485625216487703, "grad_norm": 0.8852841258049011, "learning_rate": 8.556865464632456e-06, "loss": 0.0975, "step": 2091 }, { "epoch": 1.4492552822999654, "grad_norm": 0.9188801646232605, "learning_rate": 8.556171983356451e-06, "loss": 0.1096, "step": 2092 }, { "epoch": 1.4499480429511604, "grad_norm": 0.7110975980758667, "learning_rate": 8.555478502080444e-06, "loss": 0.0815, "step": 2093 }, { "epoch": 1.4506408036023553, "grad_norm": 0.8940724730491638, "learning_rate": 8.55478502080444e-06, "loss": 0.099, "step": 2094 }, { "epoch": 1.4513335642535505, "grad_norm": 0.8253915905952454, "learning_rate": 8.554091539528432e-06, "loss": 0.0921, "step": 2095 }, { "epoch": 1.4520263249047454, "grad_norm": 0.7639085650444031, "learning_rate": 8.553398058252427e-06, "loss": 0.0818, "step": 2096 }, { "epoch": 1.4527190855559404, "grad_norm": 0.915162205696106, "learning_rate": 8.552704576976422e-06, "loss": 0.1126, "step": 2097 }, { "epoch": 1.4534118462071355, "grad_norm": 1.0026566982269287, "learning_rate": 8.552011095700417e-06, "loss": 0.1012, "step": 2098 }, { "epoch": 1.4541046068583303, "grad_norm": 0.8221924304962158, "learning_rate": 8.551317614424412e-06, "loss": 0.1048, "step": 2099 }, { "epoch": 1.4547973675095256, "grad_norm": 0.7636975646018982, "learning_rate": 8.550624133148405e-06, "loss": 0.0805, "step": 2100 }, { "epoch": 1.4554901281607204, "grad_norm": 0.847546398639679, "learning_rate": 8.5499306518724e-06, "loss": 0.1076, "step": 2101 }, { "epoch": 1.4561828888119155, "grad_norm": 0.8234090805053711, "learning_rate": 8.549237170596395e-06, "loss": 0.1032, "step": 2102 }, { "epoch": 1.4568756494631105, "grad_norm": 0.7470714449882507, "learning_rate": 8.548543689320388e-06, "loss": 0.0882, "step": 2103 }, { "epoch": 1.4575684101143054, "grad_norm": 0.9202353954315186, "learning_rate": 8.547850208044383e-06, "loss": 0.103, "step": 2104 }, { "epoch": 1.4582611707655007, "grad_norm": 0.9301064014434814, "learning_rate": 8.547156726768377e-06, "loss": 0.1181, "step": 2105 }, { "epoch": 1.4589539314166955, "grad_norm": 0.8707886934280396, "learning_rate": 8.546463245492372e-06, "loss": 0.0964, "step": 2106 }, { "epoch": 1.4596466920678905, "grad_norm": 0.8569350242614746, "learning_rate": 8.545769764216367e-06, "loss": 0.0858, "step": 2107 }, { "epoch": 1.4603394527190856, "grad_norm": 0.8189959526062012, "learning_rate": 8.545076282940361e-06, "loss": 0.0883, "step": 2108 }, { "epoch": 1.4610322133702804, "grad_norm": 0.8569098114967346, "learning_rate": 8.544382801664356e-06, "loss": 0.0987, "step": 2109 }, { "epoch": 1.4617249740214755, "grad_norm": 0.8092207908630371, "learning_rate": 8.54368932038835e-06, "loss": 0.094, "step": 2110 }, { "epoch": 1.4624177346726706, "grad_norm": 0.8205234408378601, "learning_rate": 8.542995839112345e-06, "loss": 0.0856, "step": 2111 }, { "epoch": 1.4631104953238656, "grad_norm": 0.9704433083534241, "learning_rate": 8.54230235783634e-06, "loss": 0.1348, "step": 2112 }, { "epoch": 1.4638032559750607, "grad_norm": 0.8799313902854919, "learning_rate": 8.541608876560333e-06, "loss": 0.0922, "step": 2113 }, { "epoch": 1.4644960166262555, "grad_norm": 1.072108507156372, "learning_rate": 8.540915395284328e-06, "loss": 0.0948, "step": 2114 }, { "epoch": 1.4651887772774506, "grad_norm": 0.8269270658493042, "learning_rate": 8.540221914008323e-06, "loss": 0.0901, "step": 2115 }, { "epoch": 1.4658815379286456, "grad_norm": 0.9558659195899963, "learning_rate": 8.539528432732318e-06, "loss": 0.0904, "step": 2116 }, { "epoch": 1.4665742985798407, "grad_norm": 0.8863042593002319, "learning_rate": 8.538834951456312e-06, "loss": 0.0976, "step": 2117 }, { "epoch": 1.4672670592310357, "grad_norm": 0.8317894339561462, "learning_rate": 8.538141470180306e-06, "loss": 0.0847, "step": 2118 }, { "epoch": 1.4679598198822306, "grad_norm": 1.0261125564575195, "learning_rate": 8.5374479889043e-06, "loss": 0.1116, "step": 2119 }, { "epoch": 1.4686525805334256, "grad_norm": 0.8076496720314026, "learning_rate": 8.536754507628294e-06, "loss": 0.0839, "step": 2120 }, { "epoch": 1.4693453411846207, "grad_norm": 0.8292192220687866, "learning_rate": 8.536061026352289e-06, "loss": 0.0897, "step": 2121 }, { "epoch": 1.4700381018358157, "grad_norm": 0.8142775893211365, "learning_rate": 8.535367545076284e-06, "loss": 0.0925, "step": 2122 }, { "epoch": 1.4707308624870108, "grad_norm": 0.844031810760498, "learning_rate": 8.534674063800279e-06, "loss": 0.105, "step": 2123 }, { "epoch": 1.4714236231382056, "grad_norm": 0.8712015748023987, "learning_rate": 8.533980582524272e-06, "loss": 0.1166, "step": 2124 }, { "epoch": 1.4721163837894007, "grad_norm": 0.89336097240448, "learning_rate": 8.533287101248267e-06, "loss": 0.1108, "step": 2125 }, { "epoch": 1.4728091444405957, "grad_norm": 0.8269360065460205, "learning_rate": 8.532593619972262e-06, "loss": 0.0826, "step": 2126 }, { "epoch": 1.4735019050917908, "grad_norm": 0.8392295837402344, "learning_rate": 8.531900138696257e-06, "loss": 0.0989, "step": 2127 }, { "epoch": 1.4741946657429859, "grad_norm": 0.8825324773788452, "learning_rate": 8.53120665742025e-06, "loss": 0.0998, "step": 2128 }, { "epoch": 1.4748874263941807, "grad_norm": 0.8092551231384277, "learning_rate": 8.530513176144245e-06, "loss": 0.0932, "step": 2129 }, { "epoch": 1.4755801870453757, "grad_norm": 0.8736479878425598, "learning_rate": 8.529819694868238e-06, "loss": 0.0914, "step": 2130 }, { "epoch": 1.4762729476965708, "grad_norm": 0.8300082683563232, "learning_rate": 8.529126213592233e-06, "loss": 0.0916, "step": 2131 }, { "epoch": 1.4769657083477659, "grad_norm": 0.8586478233337402, "learning_rate": 8.528432732316228e-06, "loss": 0.0913, "step": 2132 }, { "epoch": 1.477658468998961, "grad_norm": 0.935139536857605, "learning_rate": 8.527739251040223e-06, "loss": 0.126, "step": 2133 }, { "epoch": 1.4783512296501558, "grad_norm": 0.8662792444229126, "learning_rate": 8.527045769764218e-06, "loss": 0.094, "step": 2134 }, { "epoch": 1.4790439903013508, "grad_norm": 0.8421633839607239, "learning_rate": 8.526352288488211e-06, "loss": 0.097, "step": 2135 }, { "epoch": 1.4797367509525459, "grad_norm": 0.8011019229888916, "learning_rate": 8.525658807212206e-06, "loss": 0.1083, "step": 2136 }, { "epoch": 1.480429511603741, "grad_norm": 0.7471836805343628, "learning_rate": 8.524965325936201e-06, "loss": 0.0688, "step": 2137 }, { "epoch": 1.481122272254936, "grad_norm": 0.8931064009666443, "learning_rate": 8.524271844660194e-06, "loss": 0.101, "step": 2138 }, { "epoch": 1.4818150329061308, "grad_norm": 0.8537425994873047, "learning_rate": 8.523578363384189e-06, "loss": 0.0788, "step": 2139 }, { "epoch": 1.4825077935573259, "grad_norm": 0.8899325728416443, "learning_rate": 8.522884882108184e-06, "loss": 0.0986, "step": 2140 }, { "epoch": 1.483200554208521, "grad_norm": 0.864824652671814, "learning_rate": 8.522191400832179e-06, "loss": 0.0884, "step": 2141 }, { "epoch": 1.483893314859716, "grad_norm": 0.8555802702903748, "learning_rate": 8.521497919556174e-06, "loss": 0.1002, "step": 2142 }, { "epoch": 1.484586075510911, "grad_norm": 0.8407396674156189, "learning_rate": 8.520804438280167e-06, "loss": 0.0942, "step": 2143 }, { "epoch": 1.4852788361621059, "grad_norm": 0.8692257404327393, "learning_rate": 8.520110957004162e-06, "loss": 0.1022, "step": 2144 }, { "epoch": 1.485971596813301, "grad_norm": 0.9186396598815918, "learning_rate": 8.519417475728155e-06, "loss": 0.0994, "step": 2145 }, { "epoch": 1.486664357464496, "grad_norm": 0.8801345825195312, "learning_rate": 8.51872399445215e-06, "loss": 0.0945, "step": 2146 }, { "epoch": 1.487357118115691, "grad_norm": 0.7477225065231323, "learning_rate": 8.518030513176145e-06, "loss": 0.0738, "step": 2147 }, { "epoch": 1.488049878766886, "grad_norm": 1.0097402334213257, "learning_rate": 8.517337031900138e-06, "loss": 0.104, "step": 2148 }, { "epoch": 1.488742639418081, "grad_norm": 0.927609384059906, "learning_rate": 8.516643550624133e-06, "loss": 0.0944, "step": 2149 }, { "epoch": 1.489435400069276, "grad_norm": 0.7638230323791504, "learning_rate": 8.515950069348128e-06, "loss": 0.0806, "step": 2150 }, { "epoch": 1.490128160720471, "grad_norm": 1.0538862943649292, "learning_rate": 8.515256588072123e-06, "loss": 0.1002, "step": 2151 }, { "epoch": 1.490820921371666, "grad_norm": 0.807037889957428, "learning_rate": 8.514563106796118e-06, "loss": 0.0926, "step": 2152 }, { "epoch": 1.4915136820228612, "grad_norm": 0.7541260123252869, "learning_rate": 8.513869625520111e-06, "loss": 0.0893, "step": 2153 }, { "epoch": 1.492206442674056, "grad_norm": 0.783531904220581, "learning_rate": 8.513176144244106e-06, "loss": 0.0913, "step": 2154 }, { "epoch": 1.492899203325251, "grad_norm": 0.8726778030395508, "learning_rate": 8.5124826629681e-06, "loss": 0.093, "step": 2155 }, { "epoch": 1.4935919639764461, "grad_norm": 0.9442015290260315, "learning_rate": 8.511789181692095e-06, "loss": 0.1027, "step": 2156 }, { "epoch": 1.4942847246276412, "grad_norm": 0.8207205533981323, "learning_rate": 8.51109570041609e-06, "loss": 0.0933, "step": 2157 }, { "epoch": 1.4949774852788362, "grad_norm": 0.8412635922431946, "learning_rate": 8.510402219140084e-06, "loss": 0.0957, "step": 2158 }, { "epoch": 1.495670245930031, "grad_norm": 0.9047576189041138, "learning_rate": 8.50970873786408e-06, "loss": 0.0998, "step": 2159 }, { "epoch": 1.4963630065812261, "grad_norm": 0.8066442012786865, "learning_rate": 8.509015256588073e-06, "loss": 0.0849, "step": 2160 }, { "epoch": 1.4970557672324212, "grad_norm": 0.8611944317817688, "learning_rate": 8.508321775312068e-06, "loss": 0.0895, "step": 2161 }, { "epoch": 1.4977485278836162, "grad_norm": 0.9365400075912476, "learning_rate": 8.507628294036062e-06, "loss": 0.0908, "step": 2162 }, { "epoch": 1.4984412885348113, "grad_norm": 0.8359665274620056, "learning_rate": 8.506934812760056e-06, "loss": 0.0953, "step": 2163 }, { "epoch": 1.4991340491860061, "grad_norm": 0.806018054485321, "learning_rate": 8.50624133148405e-06, "loss": 0.0843, "step": 2164 }, { "epoch": 1.4998268098372012, "grad_norm": 0.8166291117668152, "learning_rate": 8.505547850208044e-06, "loss": 0.0807, "step": 2165 }, { "epoch": 1.5005195704883962, "grad_norm": 0.7994527220726013, "learning_rate": 8.504854368932039e-06, "loss": 0.0919, "step": 2166 }, { "epoch": 1.5012123311395913, "grad_norm": 0.8588024377822876, "learning_rate": 8.504160887656034e-06, "loss": 0.1015, "step": 2167 }, { "epoch": 1.5019050917907864, "grad_norm": 0.8852903842926025, "learning_rate": 8.503467406380029e-06, "loss": 0.1108, "step": 2168 }, { "epoch": 1.5025978524419812, "grad_norm": 0.8280705213546753, "learning_rate": 8.502773925104024e-06, "loss": 0.0786, "step": 2169 }, { "epoch": 1.5032906130931765, "grad_norm": 0.8698577880859375, "learning_rate": 8.502080443828017e-06, "loss": 0.1013, "step": 2170 }, { "epoch": 1.5039833737443713, "grad_norm": 0.960726261138916, "learning_rate": 8.501386962552012e-06, "loss": 0.0949, "step": 2171 }, { "epoch": 1.5046761343955664, "grad_norm": 0.8570393323898315, "learning_rate": 8.500693481276007e-06, "loss": 0.1036, "step": 2172 }, { "epoch": 1.5053688950467614, "grad_norm": 0.8700308203697205, "learning_rate": 8.5e-06, "loss": 0.102, "step": 2173 }, { "epoch": 1.5060616556979562, "grad_norm": 0.9036585092544556, "learning_rate": 8.499306518723995e-06, "loss": 0.0972, "step": 2174 }, { "epoch": 1.5067544163491515, "grad_norm": 0.8921233415603638, "learning_rate": 8.49861303744799e-06, "loss": 0.1123, "step": 2175 }, { "epoch": 1.5074471770003464, "grad_norm": 0.7798379063606262, "learning_rate": 8.497919556171985e-06, "loss": 0.0886, "step": 2176 }, { "epoch": 1.5081399376515414, "grad_norm": 0.8370013236999512, "learning_rate": 8.49722607489598e-06, "loss": 0.0957, "step": 2177 }, { "epoch": 1.5088326983027365, "grad_norm": 0.7789174318313599, "learning_rate": 8.496532593619973e-06, "loss": 0.0889, "step": 2178 }, { "epoch": 1.5095254589539313, "grad_norm": 0.7475576400756836, "learning_rate": 8.495839112343968e-06, "loss": 0.0911, "step": 2179 }, { "epoch": 1.5102182196051266, "grad_norm": 0.8233779072761536, "learning_rate": 8.495145631067961e-06, "loss": 0.0744, "step": 2180 }, { "epoch": 1.5109109802563214, "grad_norm": 0.7661387324333191, "learning_rate": 8.494452149791956e-06, "loss": 0.0931, "step": 2181 }, { "epoch": 1.5116037409075165, "grad_norm": 0.7867785096168518, "learning_rate": 8.493758668515951e-06, "loss": 0.0901, "step": 2182 }, { "epoch": 1.5122965015587115, "grad_norm": 0.8294643759727478, "learning_rate": 8.493065187239944e-06, "loss": 0.1132, "step": 2183 }, { "epoch": 1.5129892622099064, "grad_norm": 0.8717425465583801, "learning_rate": 8.492371705963939e-06, "loss": 0.0987, "step": 2184 }, { "epoch": 1.5136820228611017, "grad_norm": 0.8166452646255493, "learning_rate": 8.491678224687934e-06, "loss": 0.0901, "step": 2185 }, { "epoch": 1.5143747835122965, "grad_norm": 0.8640490770339966, "learning_rate": 8.490984743411929e-06, "loss": 0.1201, "step": 2186 }, { "epoch": 1.5150675441634915, "grad_norm": 0.8363556861877441, "learning_rate": 8.490291262135924e-06, "loss": 0.1068, "step": 2187 }, { "epoch": 1.5157603048146866, "grad_norm": 0.8939452171325684, "learning_rate": 8.489597780859917e-06, "loss": 0.0878, "step": 2188 }, { "epoch": 1.5164530654658814, "grad_norm": 0.9297623634338379, "learning_rate": 8.488904299583912e-06, "loss": 0.0937, "step": 2189 }, { "epoch": 1.5171458261170767, "grad_norm": 0.8661013841629028, "learning_rate": 8.488210818307905e-06, "loss": 0.0862, "step": 2190 }, { "epoch": 1.5178385867682715, "grad_norm": 0.9533841013908386, "learning_rate": 8.4875173370319e-06, "loss": 0.1266, "step": 2191 }, { "epoch": 1.5185313474194666, "grad_norm": 0.873518705368042, "learning_rate": 8.486823855755895e-06, "loss": 0.0928, "step": 2192 }, { "epoch": 1.5192241080706617, "grad_norm": 0.8036083579063416, "learning_rate": 8.48613037447989e-06, "loss": 0.093, "step": 2193 }, { "epoch": 1.5199168687218565, "grad_norm": 0.8228848576545715, "learning_rate": 8.485436893203885e-06, "loss": 0.0908, "step": 2194 }, { "epoch": 1.5206096293730518, "grad_norm": 0.8991081118583679, "learning_rate": 8.484743411927878e-06, "loss": 0.1134, "step": 2195 }, { "epoch": 1.5213023900242466, "grad_norm": 0.891331136226654, "learning_rate": 8.484049930651873e-06, "loss": 0.1266, "step": 2196 }, { "epoch": 1.5219951506754417, "grad_norm": 0.7529364228248596, "learning_rate": 8.483356449375868e-06, "loss": 0.0813, "step": 2197 }, { "epoch": 1.5226879113266367, "grad_norm": 0.774604320526123, "learning_rate": 8.482662968099861e-06, "loss": 0.1056, "step": 2198 }, { "epoch": 1.5233806719778316, "grad_norm": 0.9285426139831543, "learning_rate": 8.481969486823856e-06, "loss": 0.0923, "step": 2199 }, { "epoch": 1.5240734326290268, "grad_norm": 0.866604208946228, "learning_rate": 8.481276005547851e-06, "loss": 0.1077, "step": 2200 }, { "epoch": 1.5247661932802217, "grad_norm": 0.8425531983375549, "learning_rate": 8.480582524271845e-06, "loss": 0.0961, "step": 2201 }, { "epoch": 1.5254589539314167, "grad_norm": 0.7247449159622192, "learning_rate": 8.47988904299584e-06, "loss": 0.0833, "step": 2202 }, { "epoch": 1.5261517145826118, "grad_norm": 0.7726675868034363, "learning_rate": 8.479195561719834e-06, "loss": 0.0886, "step": 2203 }, { "epoch": 1.5268444752338066, "grad_norm": 0.7877854704856873, "learning_rate": 8.47850208044383e-06, "loss": 0.0782, "step": 2204 }, { "epoch": 1.527537235885002, "grad_norm": 0.8992438912391663, "learning_rate": 8.477808599167823e-06, "loss": 0.0864, "step": 2205 }, { "epoch": 1.5282299965361967, "grad_norm": 0.8800943493843079, "learning_rate": 8.477115117891817e-06, "loss": 0.0785, "step": 2206 }, { "epoch": 1.5289227571873918, "grad_norm": 0.7755690813064575, "learning_rate": 8.476421636615812e-06, "loss": 0.0819, "step": 2207 }, { "epoch": 1.5296155178385868, "grad_norm": 0.7317102551460266, "learning_rate": 8.475728155339806e-06, "loss": 0.0843, "step": 2208 }, { "epoch": 1.5303082784897817, "grad_norm": 0.6747839450836182, "learning_rate": 8.4750346740638e-06, "loss": 0.0643, "step": 2209 }, { "epoch": 1.5310010391409767, "grad_norm": 0.789362907409668, "learning_rate": 8.474341192787796e-06, "loss": 0.0965, "step": 2210 }, { "epoch": 1.5316937997921718, "grad_norm": 0.8516563773155212, "learning_rate": 8.47364771151179e-06, "loss": 0.1064, "step": 2211 }, { "epoch": 1.5323865604433669, "grad_norm": 1.3988782167434692, "learning_rate": 8.472954230235785e-06, "loss": 0.0986, "step": 2212 }, { "epoch": 1.533079321094562, "grad_norm": 0.8765525817871094, "learning_rate": 8.472260748959779e-06, "loss": 0.1131, "step": 2213 }, { "epoch": 1.5337720817457567, "grad_norm": 0.8611074686050415, "learning_rate": 8.471567267683774e-06, "loss": 0.0941, "step": 2214 }, { "epoch": 1.5344648423969518, "grad_norm": 0.8504629135131836, "learning_rate": 8.470873786407767e-06, "loss": 0.1056, "step": 2215 }, { "epoch": 1.5351576030481469, "grad_norm": 0.8550898432731628, "learning_rate": 8.470180305131762e-06, "loss": 0.1006, "step": 2216 }, { "epoch": 1.535850363699342, "grad_norm": 0.8239811062812805, "learning_rate": 8.469486823855757e-06, "loss": 0.081, "step": 2217 }, { "epoch": 1.536543124350537, "grad_norm": 0.8953526616096497, "learning_rate": 8.468793342579752e-06, "loss": 0.114, "step": 2218 }, { "epoch": 1.5372358850017318, "grad_norm": 0.8694466948509216, "learning_rate": 8.468099861303747e-06, "loss": 0.0892, "step": 2219 }, { "epoch": 1.5379286456529269, "grad_norm": 0.9421346783638, "learning_rate": 8.46740638002774e-06, "loss": 0.1106, "step": 2220 }, { "epoch": 1.538621406304122, "grad_norm": 0.8330638408660889, "learning_rate": 8.466712898751735e-06, "loss": 0.1088, "step": 2221 }, { "epoch": 1.539314166955317, "grad_norm": 0.7530385851860046, "learning_rate": 8.46601941747573e-06, "loss": 0.0777, "step": 2222 }, { "epoch": 1.540006927606512, "grad_norm": 0.7912553548812866, "learning_rate": 8.465325936199723e-06, "loss": 0.0788, "step": 2223 }, { "epoch": 1.5406996882577069, "grad_norm": 0.9006667733192444, "learning_rate": 8.464632454923718e-06, "loss": 0.115, "step": 2224 }, { "epoch": 1.541392448908902, "grad_norm": 0.8519740104675293, "learning_rate": 8.463938973647711e-06, "loss": 0.0983, "step": 2225 }, { "epoch": 1.542085209560097, "grad_norm": 0.9370810389518738, "learning_rate": 8.463245492371706e-06, "loss": 0.1081, "step": 2226 }, { "epoch": 1.542777970211292, "grad_norm": 0.8160594701766968, "learning_rate": 8.462552011095701e-06, "loss": 0.0985, "step": 2227 }, { "epoch": 1.543470730862487, "grad_norm": 0.8738390803337097, "learning_rate": 8.461858529819696e-06, "loss": 0.1103, "step": 2228 }, { "epoch": 1.544163491513682, "grad_norm": 0.8068435192108154, "learning_rate": 8.46116504854369e-06, "loss": 0.0806, "step": 2229 }, { "epoch": 1.544856252164877, "grad_norm": 0.9434190988540649, "learning_rate": 8.460471567267684e-06, "loss": 0.0955, "step": 2230 }, { "epoch": 1.545549012816072, "grad_norm": 0.8715764284133911, "learning_rate": 8.459778085991679e-06, "loss": 0.0864, "step": 2231 }, { "epoch": 1.546241773467267, "grad_norm": 0.7127259969711304, "learning_rate": 8.459084604715674e-06, "loss": 0.0829, "step": 2232 }, { "epoch": 1.5469345341184622, "grad_norm": 0.798002302646637, "learning_rate": 8.458391123439667e-06, "loss": 0.0826, "step": 2233 }, { "epoch": 1.547627294769657, "grad_norm": 0.7845522165298462, "learning_rate": 8.457697642163662e-06, "loss": 0.0838, "step": 2234 }, { "epoch": 1.548320055420852, "grad_norm": 0.8004646301269531, "learning_rate": 8.457004160887657e-06, "loss": 0.0846, "step": 2235 }, { "epoch": 1.549012816072047, "grad_norm": 0.8239243626594543, "learning_rate": 8.456310679611652e-06, "loss": 0.0949, "step": 2236 }, { "epoch": 1.5497055767232422, "grad_norm": 0.785351037979126, "learning_rate": 8.455617198335647e-06, "loss": 0.0776, "step": 2237 }, { "epoch": 1.5503983373744372, "grad_norm": 0.7641162276268005, "learning_rate": 8.45492371705964e-06, "loss": 0.0743, "step": 2238 }, { "epoch": 1.551091098025632, "grad_norm": 0.8055779337882996, "learning_rate": 8.454230235783635e-06, "loss": 0.0779, "step": 2239 }, { "epoch": 1.551783858676827, "grad_norm": 0.806489109992981, "learning_rate": 8.453536754507628e-06, "loss": 0.0813, "step": 2240 }, { "epoch": 1.5524766193280222, "grad_norm": 0.8963631391525269, "learning_rate": 8.452843273231623e-06, "loss": 0.0991, "step": 2241 }, { "epoch": 1.5531693799792172, "grad_norm": 0.8540571928024292, "learning_rate": 8.452149791955618e-06, "loss": 0.0984, "step": 2242 }, { "epoch": 1.5538621406304123, "grad_norm": 0.7815813422203064, "learning_rate": 8.451456310679611e-06, "loss": 0.0863, "step": 2243 }, { "epoch": 1.5545549012816071, "grad_norm": 0.8563543558120728, "learning_rate": 8.450762829403606e-06, "loss": 0.0894, "step": 2244 }, { "epoch": 1.5552476619328022, "grad_norm": 0.7765095233917236, "learning_rate": 8.450069348127601e-06, "loss": 0.078, "step": 2245 }, { "epoch": 1.5559404225839972, "grad_norm": 0.7816537022590637, "learning_rate": 8.449375866851596e-06, "loss": 0.0856, "step": 2246 }, { "epoch": 1.5566331832351923, "grad_norm": 0.9687926173210144, "learning_rate": 8.448682385575591e-06, "loss": 0.085, "step": 2247 }, { "epoch": 1.5573259438863873, "grad_norm": 0.9634583592414856, "learning_rate": 8.447988904299584e-06, "loss": 0.1021, "step": 2248 }, { "epoch": 1.5580187045375822, "grad_norm": 0.8024788498878479, "learning_rate": 8.44729542302358e-06, "loss": 0.0829, "step": 2249 }, { "epoch": 1.5587114651887772, "grad_norm": 0.77815181016922, "learning_rate": 8.446601941747573e-06, "loss": 0.0855, "step": 2250 }, { "epoch": 1.5594042258399723, "grad_norm": 1.0114446878433228, "learning_rate": 8.445908460471567e-06, "loss": 0.113, "step": 2251 }, { "epoch": 1.5600969864911673, "grad_norm": 0.8147749304771423, "learning_rate": 8.445214979195562e-06, "loss": 0.0855, "step": 2252 }, { "epoch": 1.5607897471423624, "grad_norm": 0.8138399124145508, "learning_rate": 8.444521497919557e-06, "loss": 0.088, "step": 2253 }, { "epoch": 1.5614825077935572, "grad_norm": 0.8528136610984802, "learning_rate": 8.443828016643552e-06, "loss": 0.0915, "step": 2254 }, { "epoch": 1.5621752684447523, "grad_norm": 0.7786431312561035, "learning_rate": 8.443134535367546e-06, "loss": 0.085, "step": 2255 }, { "epoch": 1.5628680290959474, "grad_norm": 0.8430132865905762, "learning_rate": 8.44244105409154e-06, "loss": 0.0956, "step": 2256 }, { "epoch": 1.5635607897471424, "grad_norm": 0.8963088393211365, "learning_rate": 8.441747572815535e-06, "loss": 0.0866, "step": 2257 }, { "epoch": 1.5642535503983375, "grad_norm": 0.7786991596221924, "learning_rate": 8.441054091539529e-06, "loss": 0.0851, "step": 2258 }, { "epoch": 1.5649463110495323, "grad_norm": 0.939453125, "learning_rate": 8.440360610263524e-06, "loss": 0.1006, "step": 2259 }, { "epoch": 1.5656390717007274, "grad_norm": 0.7245839834213257, "learning_rate": 8.439667128987517e-06, "loss": 0.085, "step": 2260 }, { "epoch": 1.5663318323519224, "grad_norm": 0.9496403336524963, "learning_rate": 8.438973647711512e-06, "loss": 0.1069, "step": 2261 }, { "epoch": 1.5670245930031175, "grad_norm": 0.8363348245620728, "learning_rate": 8.438280166435507e-06, "loss": 0.0969, "step": 2262 }, { "epoch": 1.5677173536543125, "grad_norm": 0.8518447279930115, "learning_rate": 8.437586685159502e-06, "loss": 0.0822, "step": 2263 }, { "epoch": 1.5684101143055074, "grad_norm": 0.8098133206367493, "learning_rate": 8.436893203883497e-06, "loss": 0.0888, "step": 2264 }, { "epoch": 1.5691028749567024, "grad_norm": 0.8080687522888184, "learning_rate": 8.43619972260749e-06, "loss": 0.0876, "step": 2265 }, { "epoch": 1.5697956356078975, "grad_norm": 1.146905779838562, "learning_rate": 8.435506241331485e-06, "loss": 0.1192, "step": 2266 }, { "epoch": 1.5704883962590925, "grad_norm": 0.7518221735954285, "learning_rate": 8.43481276005548e-06, "loss": 0.0809, "step": 2267 }, { "epoch": 1.5711811569102876, "grad_norm": 0.8670949935913086, "learning_rate": 8.434119278779473e-06, "loss": 0.0992, "step": 2268 }, { "epoch": 1.5718739175614824, "grad_norm": 0.8235729336738586, "learning_rate": 8.433425797503468e-06, "loss": 0.0958, "step": 2269 }, { "epoch": 1.5725666782126775, "grad_norm": 0.8344941735267639, "learning_rate": 8.432732316227463e-06, "loss": 0.101, "step": 2270 }, { "epoch": 1.5732594388638725, "grad_norm": 0.8182787895202637, "learning_rate": 8.432038834951458e-06, "loss": 0.1032, "step": 2271 }, { "epoch": 1.5739521995150676, "grad_norm": 0.9052494168281555, "learning_rate": 8.431345353675453e-06, "loss": 0.0842, "step": 2272 }, { "epoch": 1.5746449601662627, "grad_norm": 0.8101468682289124, "learning_rate": 8.430651872399446e-06, "loss": 0.0897, "step": 2273 }, { "epoch": 1.5753377208174575, "grad_norm": 0.7904663681983948, "learning_rate": 8.42995839112344e-06, "loss": 0.0918, "step": 2274 }, { "epoch": 1.5760304814686525, "grad_norm": 0.9042665958404541, "learning_rate": 8.429264909847434e-06, "loss": 0.0922, "step": 2275 }, { "epoch": 1.5767232421198476, "grad_norm": 0.8572434782981873, "learning_rate": 8.428571428571429e-06, "loss": 0.0952, "step": 2276 }, { "epoch": 1.5774160027710427, "grad_norm": 0.7563155293464661, "learning_rate": 8.427877947295424e-06, "loss": 0.0838, "step": 2277 }, { "epoch": 1.5781087634222377, "grad_norm": 0.7798501253128052, "learning_rate": 8.427184466019419e-06, "loss": 0.0838, "step": 2278 }, { "epoch": 1.5788015240734325, "grad_norm": 0.901751697063446, "learning_rate": 8.426490984743412e-06, "loss": 0.117, "step": 2279 }, { "epoch": 1.5794942847246276, "grad_norm": 0.9761192798614502, "learning_rate": 8.425797503467407e-06, "loss": 0.0993, "step": 2280 }, { "epoch": 1.5801870453758227, "grad_norm": 1.0279780626296997, "learning_rate": 8.425104022191402e-06, "loss": 0.141, "step": 2281 }, { "epoch": 1.5808798060270177, "grad_norm": 0.848374605178833, "learning_rate": 8.424410540915397e-06, "loss": 0.0901, "step": 2282 }, { "epoch": 1.5815725666782128, "grad_norm": 0.7858184576034546, "learning_rate": 8.42371705963939e-06, "loss": 0.0867, "step": 2283 }, { "epoch": 1.5822653273294076, "grad_norm": 0.8985626101493835, "learning_rate": 8.423023578363385e-06, "loss": 0.1012, "step": 2284 }, { "epoch": 1.5829580879806027, "grad_norm": 0.9050174355506897, "learning_rate": 8.422330097087378e-06, "loss": 0.0774, "step": 2285 }, { "epoch": 1.5836508486317977, "grad_norm": 0.7872288823127747, "learning_rate": 8.421636615811373e-06, "loss": 0.0825, "step": 2286 }, { "epoch": 1.5843436092829926, "grad_norm": 0.862546980381012, "learning_rate": 8.420943134535368e-06, "loss": 0.1098, "step": 2287 }, { "epoch": 1.5850363699341878, "grad_norm": 0.8242610096931458, "learning_rate": 8.420249653259363e-06, "loss": 0.0962, "step": 2288 }, { "epoch": 1.5857291305853827, "grad_norm": 0.9755899310112, "learning_rate": 8.419556171983358e-06, "loss": 0.1015, "step": 2289 }, { "epoch": 1.5864218912365777, "grad_norm": 1.0072556734085083, "learning_rate": 8.418862690707351e-06, "loss": 0.1444, "step": 2290 }, { "epoch": 1.5871146518877728, "grad_norm": 0.980300784111023, "learning_rate": 8.418169209431346e-06, "loss": 0.1126, "step": 2291 }, { "epoch": 1.5878074125389676, "grad_norm": 0.770821750164032, "learning_rate": 8.417475728155341e-06, "loss": 0.0851, "step": 2292 }, { "epoch": 1.588500173190163, "grad_norm": 0.8465372323989868, "learning_rate": 8.416782246879334e-06, "loss": 0.0993, "step": 2293 }, { "epoch": 1.5891929338413577, "grad_norm": 0.7881616353988647, "learning_rate": 8.41608876560333e-06, "loss": 0.0852, "step": 2294 }, { "epoch": 1.5898856944925528, "grad_norm": 0.8990263342857361, "learning_rate": 8.415395284327324e-06, "loss": 0.1093, "step": 2295 }, { "epoch": 1.5905784551437478, "grad_norm": 0.8176400065422058, "learning_rate": 8.414701803051319e-06, "loss": 0.0919, "step": 2296 }, { "epoch": 1.5912712157949427, "grad_norm": 0.831199049949646, "learning_rate": 8.414008321775314e-06, "loss": 0.0935, "step": 2297 }, { "epoch": 1.591963976446138, "grad_norm": 0.9600399732589722, "learning_rate": 8.413314840499307e-06, "loss": 0.1248, "step": 2298 }, { "epoch": 1.5926567370973328, "grad_norm": 0.8325619697570801, "learning_rate": 8.412621359223302e-06, "loss": 0.0868, "step": 2299 }, { "epoch": 1.5933494977485279, "grad_norm": 0.7649892568588257, "learning_rate": 8.411927877947295e-06, "loss": 0.0913, "step": 2300 }, { "epoch": 1.594042258399723, "grad_norm": 1.0146855115890503, "learning_rate": 8.41123439667129e-06, "loss": 0.0886, "step": 2301 }, { "epoch": 1.5947350190509177, "grad_norm": 0.775775134563446, "learning_rate": 8.410540915395285e-06, "loss": 0.0824, "step": 2302 }, { "epoch": 1.595427779702113, "grad_norm": 0.830560564994812, "learning_rate": 8.409847434119279e-06, "loss": 0.0918, "step": 2303 }, { "epoch": 1.5961205403533079, "grad_norm": 0.7563770413398743, "learning_rate": 8.409153952843274e-06, "loss": 0.083, "step": 2304 }, { "epoch": 1.596813301004503, "grad_norm": 0.8576757311820984, "learning_rate": 8.408460471567268e-06, "loss": 0.0931, "step": 2305 }, { "epoch": 1.597506061655698, "grad_norm": 0.8318626880645752, "learning_rate": 8.407766990291263e-06, "loss": 0.0818, "step": 2306 }, { "epoch": 1.5981988223068928, "grad_norm": 0.8872581124305725, "learning_rate": 8.407073509015258e-06, "loss": 0.117, "step": 2307 }, { "epoch": 1.598891582958088, "grad_norm": 0.8937380313873291, "learning_rate": 8.406380027739252e-06, "loss": 0.1128, "step": 2308 }, { "epoch": 1.599584343609283, "grad_norm": 0.7964592576026917, "learning_rate": 8.405686546463247e-06, "loss": 0.0863, "step": 2309 }, { "epoch": 1.600277104260478, "grad_norm": 0.7582256197929382, "learning_rate": 8.40499306518724e-06, "loss": 0.0784, "step": 2310 }, { "epoch": 1.600969864911673, "grad_norm": 0.7886786460876465, "learning_rate": 8.404299583911235e-06, "loss": 0.0917, "step": 2311 }, { "epoch": 1.6016626255628679, "grad_norm": 0.9329751133918762, "learning_rate": 8.40360610263523e-06, "loss": 0.1231, "step": 2312 }, { "epoch": 1.6023553862140631, "grad_norm": 0.8119995594024658, "learning_rate": 8.402912621359225e-06, "loss": 0.0927, "step": 2313 }, { "epoch": 1.603048146865258, "grad_norm": 0.8827449679374695, "learning_rate": 8.40221914008322e-06, "loss": 0.096, "step": 2314 }, { "epoch": 1.603740907516453, "grad_norm": 0.8343936204910278, "learning_rate": 8.401525658807213e-06, "loss": 0.1083, "step": 2315 }, { "epoch": 1.604433668167648, "grad_norm": 0.769190788269043, "learning_rate": 8.400832177531208e-06, "loss": 0.0731, "step": 2316 }, { "epoch": 1.605126428818843, "grad_norm": 0.8587493896484375, "learning_rate": 8.400138696255203e-06, "loss": 0.1032, "step": 2317 }, { "epoch": 1.6058191894700382, "grad_norm": 0.9102193117141724, "learning_rate": 8.399445214979196e-06, "loss": 0.0979, "step": 2318 }, { "epoch": 1.606511950121233, "grad_norm": 0.7680855989456177, "learning_rate": 8.39875173370319e-06, "loss": 0.0859, "step": 2319 }, { "epoch": 1.607204710772428, "grad_norm": 0.8719523549079895, "learning_rate": 8.398058252427184e-06, "loss": 0.1093, "step": 2320 }, { "epoch": 1.6078974714236232, "grad_norm": 0.8795385956764221, "learning_rate": 8.397364771151179e-06, "loss": 0.0992, "step": 2321 }, { "epoch": 1.608590232074818, "grad_norm": 0.7637278437614441, "learning_rate": 8.396671289875174e-06, "loss": 0.0921, "step": 2322 }, { "epoch": 1.6092829927260133, "grad_norm": 0.7343862652778625, "learning_rate": 8.395977808599169e-06, "loss": 0.087, "step": 2323 }, { "epoch": 1.609975753377208, "grad_norm": 0.899121880531311, "learning_rate": 8.395284327323164e-06, "loss": 0.0987, "step": 2324 }, { "epoch": 1.6106685140284032, "grad_norm": 0.7771249413490295, "learning_rate": 8.394590846047157e-06, "loss": 0.0823, "step": 2325 }, { "epoch": 1.6113612746795982, "grad_norm": 0.8386172652244568, "learning_rate": 8.393897364771152e-06, "loss": 0.0924, "step": 2326 }, { "epoch": 1.612054035330793, "grad_norm": 0.8147438764572144, "learning_rate": 8.393203883495147e-06, "loss": 0.0955, "step": 2327 }, { "epoch": 1.6127467959819883, "grad_norm": 0.95125412940979, "learning_rate": 8.39251040221914e-06, "loss": 0.1013, "step": 2328 }, { "epoch": 1.6134395566331832, "grad_norm": 0.8926140069961548, "learning_rate": 8.391816920943135e-06, "loss": 0.1077, "step": 2329 }, { "epoch": 1.6141323172843782, "grad_norm": 0.916304349899292, "learning_rate": 8.39112343966713e-06, "loss": 0.1121, "step": 2330 }, { "epoch": 1.6148250779355733, "grad_norm": 0.8023694753646851, "learning_rate": 8.390429958391125e-06, "loss": 0.0877, "step": 2331 }, { "epoch": 1.6155178385867681, "grad_norm": 0.8459504246711731, "learning_rate": 8.38973647711512e-06, "loss": 0.0984, "step": 2332 }, { "epoch": 1.6162105992379634, "grad_norm": 0.9580438733100891, "learning_rate": 8.389042995839113e-06, "loss": 0.1066, "step": 2333 }, { "epoch": 1.6169033598891582, "grad_norm": 0.9596865773200989, "learning_rate": 8.388349514563108e-06, "loss": 0.1227, "step": 2334 }, { "epoch": 1.6175961205403533, "grad_norm": 0.86414635181427, "learning_rate": 8.387656033287101e-06, "loss": 0.1046, "step": 2335 }, { "epoch": 1.6182888811915483, "grad_norm": 0.8180360198020935, "learning_rate": 8.386962552011096e-06, "loss": 0.0839, "step": 2336 }, { "epoch": 1.6189816418427432, "grad_norm": 0.8903512358665466, "learning_rate": 8.386269070735091e-06, "loss": 0.0875, "step": 2337 }, { "epoch": 1.6196744024939385, "grad_norm": 0.7703673839569092, "learning_rate": 8.385575589459084e-06, "loss": 0.0887, "step": 2338 }, { "epoch": 1.6203671631451333, "grad_norm": 0.8291234374046326, "learning_rate": 8.38488210818308e-06, "loss": 0.0738, "step": 2339 }, { "epoch": 1.6210599237963284, "grad_norm": 0.850165069103241, "learning_rate": 8.384188626907074e-06, "loss": 0.1036, "step": 2340 }, { "epoch": 1.6217526844475234, "grad_norm": 0.7883290648460388, "learning_rate": 8.383495145631069e-06, "loss": 0.0974, "step": 2341 }, { "epoch": 1.6224454450987182, "grad_norm": 0.9966535568237305, "learning_rate": 8.382801664355064e-06, "loss": 0.1101, "step": 2342 }, { "epoch": 1.6231382057499135, "grad_norm": 0.8514889478683472, "learning_rate": 8.382108183079057e-06, "loss": 0.0961, "step": 2343 }, { "epoch": 1.6238309664011084, "grad_norm": 0.8843449950218201, "learning_rate": 8.381414701803052e-06, "loss": 0.0921, "step": 2344 }, { "epoch": 1.6245237270523034, "grad_norm": 0.8890202641487122, "learning_rate": 8.380721220527045e-06, "loss": 0.1057, "step": 2345 }, { "epoch": 1.6252164877034985, "grad_norm": 0.8342781066894531, "learning_rate": 8.38002773925104e-06, "loss": 0.1014, "step": 2346 }, { "epoch": 1.6259092483546933, "grad_norm": 0.9303826689720154, "learning_rate": 8.379334257975035e-06, "loss": 0.1357, "step": 2347 }, { "epoch": 1.6266020090058886, "grad_norm": 0.8655639290809631, "learning_rate": 8.37864077669903e-06, "loss": 0.0977, "step": 2348 }, { "epoch": 1.6272947696570834, "grad_norm": 0.9540505409240723, "learning_rate": 8.377947295423025e-06, "loss": 0.0954, "step": 2349 }, { "epoch": 1.6279875303082785, "grad_norm": 0.8128141164779663, "learning_rate": 8.377253814147018e-06, "loss": 0.0744, "step": 2350 }, { "epoch": 1.6286802909594735, "grad_norm": 0.8301307559013367, "learning_rate": 8.376560332871013e-06, "loss": 0.1025, "step": 2351 }, { "epoch": 1.6293730516106684, "grad_norm": 0.9831981658935547, "learning_rate": 8.375866851595008e-06, "loss": 0.1147, "step": 2352 }, { "epoch": 1.6300658122618636, "grad_norm": 0.8499257564544678, "learning_rate": 8.375173370319002e-06, "loss": 0.0936, "step": 2353 }, { "epoch": 1.6307585729130585, "grad_norm": 0.8565851449966431, "learning_rate": 8.374479889042996e-06, "loss": 0.0939, "step": 2354 }, { "epoch": 1.6314513335642535, "grad_norm": 0.9829320907592773, "learning_rate": 8.373786407766991e-06, "loss": 0.0887, "step": 2355 }, { "epoch": 1.6321440942154486, "grad_norm": 0.8618051409721375, "learning_rate": 8.373092926490985e-06, "loss": 0.095, "step": 2356 }, { "epoch": 1.6328368548666434, "grad_norm": 0.8647840023040771, "learning_rate": 8.37239944521498e-06, "loss": 0.0929, "step": 2357 }, { "epoch": 1.6335296155178387, "grad_norm": 0.8255338072776794, "learning_rate": 8.371705963938975e-06, "loss": 0.0896, "step": 2358 }, { "epoch": 1.6342223761690335, "grad_norm": 0.772359311580658, "learning_rate": 8.37101248266297e-06, "loss": 0.0771, "step": 2359 }, { "epoch": 1.6349151368202286, "grad_norm": 0.7591848373413086, "learning_rate": 8.370319001386963e-06, "loss": 0.086, "step": 2360 }, { "epoch": 1.6356078974714237, "grad_norm": 0.8855777978897095, "learning_rate": 8.369625520110958e-06, "loss": 0.081, "step": 2361 }, { "epoch": 1.6363006581226185, "grad_norm": 0.8855263590812683, "learning_rate": 8.368932038834953e-06, "loss": 0.0948, "step": 2362 }, { "epoch": 1.6369934187738138, "grad_norm": 0.8888722658157349, "learning_rate": 8.368238557558946e-06, "loss": 0.0886, "step": 2363 }, { "epoch": 1.6376861794250086, "grad_norm": 0.9112776517868042, "learning_rate": 8.36754507628294e-06, "loss": 0.1144, "step": 2364 }, { "epoch": 1.6383789400762037, "grad_norm": 0.8456845283508301, "learning_rate": 8.366851595006936e-06, "loss": 0.0932, "step": 2365 }, { "epoch": 1.6390717007273987, "grad_norm": 0.9201662540435791, "learning_rate": 8.36615811373093e-06, "loss": 0.0937, "step": 2366 }, { "epoch": 1.6397644613785936, "grad_norm": 0.9050766825675964, "learning_rate": 8.365464632454926e-06, "loss": 0.1163, "step": 2367 }, { "epoch": 1.6404572220297888, "grad_norm": 0.9505478739738464, "learning_rate": 8.364771151178919e-06, "loss": 0.1182, "step": 2368 }, { "epoch": 1.6411499826809837, "grad_norm": 0.8112539052963257, "learning_rate": 8.364077669902914e-06, "loss": 0.0842, "step": 2369 }, { "epoch": 1.6418427433321787, "grad_norm": 0.8253764510154724, "learning_rate": 8.363384188626907e-06, "loss": 0.0877, "step": 2370 }, { "epoch": 1.6425355039833738, "grad_norm": 0.9781371355056763, "learning_rate": 8.362690707350902e-06, "loss": 0.1425, "step": 2371 }, { "epoch": 1.6432282646345686, "grad_norm": 0.917624831199646, "learning_rate": 8.361997226074897e-06, "loss": 0.0885, "step": 2372 }, { "epoch": 1.643921025285764, "grad_norm": 0.817848801612854, "learning_rate": 8.361303744798892e-06, "loss": 0.1102, "step": 2373 }, { "epoch": 1.6446137859369587, "grad_norm": 0.9392564296722412, "learning_rate": 8.360610263522887e-06, "loss": 0.1327, "step": 2374 }, { "epoch": 1.6453065465881538, "grad_norm": 0.9744406342506409, "learning_rate": 8.35991678224688e-06, "loss": 0.1217, "step": 2375 }, { "epoch": 1.6459993072393488, "grad_norm": 0.8594071865081787, "learning_rate": 8.359223300970875e-06, "loss": 0.1105, "step": 2376 }, { "epoch": 1.6466920678905437, "grad_norm": 0.7380807399749756, "learning_rate": 8.35852981969487e-06, "loss": 0.0929, "step": 2377 }, { "epoch": 1.647384828541739, "grad_norm": 0.8635945320129395, "learning_rate": 8.357836338418863e-06, "loss": 0.1011, "step": 2378 }, { "epoch": 1.6480775891929338, "grad_norm": 0.9151350259780884, "learning_rate": 8.357142857142858e-06, "loss": 0.1154, "step": 2379 }, { "epoch": 1.6487703498441288, "grad_norm": 0.8731752038002014, "learning_rate": 8.356449375866851e-06, "loss": 0.1034, "step": 2380 }, { "epoch": 1.649463110495324, "grad_norm": 0.9005547165870667, "learning_rate": 8.355755894590846e-06, "loss": 0.0866, "step": 2381 }, { "epoch": 1.6501558711465187, "grad_norm": 0.8569620251655579, "learning_rate": 8.355062413314841e-06, "loss": 0.0912, "step": 2382 }, { "epoch": 1.650848631797714, "grad_norm": 0.8079420924186707, "learning_rate": 8.354368932038836e-06, "loss": 0.0894, "step": 2383 }, { "epoch": 1.6515413924489089, "grad_norm": 0.9063421487808228, "learning_rate": 8.353675450762831e-06, "loss": 0.0893, "step": 2384 }, { "epoch": 1.652234153100104, "grad_norm": 0.8574304580688477, "learning_rate": 8.352981969486824e-06, "loss": 0.0971, "step": 2385 }, { "epoch": 1.652926913751299, "grad_norm": 0.8941143751144409, "learning_rate": 8.352288488210819e-06, "loss": 0.1124, "step": 2386 }, { "epoch": 1.6536196744024938, "grad_norm": 0.8861686587333679, "learning_rate": 8.351595006934814e-06, "loss": 0.0916, "step": 2387 }, { "epoch": 1.654312435053689, "grad_norm": 0.8929340243339539, "learning_rate": 8.350901525658807e-06, "loss": 0.1187, "step": 2388 }, { "epoch": 1.655005195704884, "grad_norm": 0.8803771138191223, "learning_rate": 8.350208044382802e-06, "loss": 0.0893, "step": 2389 }, { "epoch": 1.655697956356079, "grad_norm": 0.8754448294639587, "learning_rate": 8.349514563106797e-06, "loss": 0.0779, "step": 2390 }, { "epoch": 1.656390717007274, "grad_norm": 0.7831154465675354, "learning_rate": 8.348821081830792e-06, "loss": 0.0974, "step": 2391 }, { "epoch": 1.6570834776584689, "grad_norm": 0.8255621790885925, "learning_rate": 8.348127600554787e-06, "loss": 0.0894, "step": 2392 }, { "epoch": 1.6577762383096641, "grad_norm": 0.8432981371879578, "learning_rate": 8.34743411927878e-06, "loss": 0.0946, "step": 2393 }, { "epoch": 1.658468998960859, "grad_norm": 0.8284807801246643, "learning_rate": 8.346740638002775e-06, "loss": 0.0788, "step": 2394 }, { "epoch": 1.659161759612054, "grad_norm": 0.8855732679367065, "learning_rate": 8.346047156726768e-06, "loss": 0.0872, "step": 2395 }, { "epoch": 1.659854520263249, "grad_norm": 0.8978323936462402, "learning_rate": 8.345353675450763e-06, "loss": 0.1052, "step": 2396 }, { "epoch": 1.660547280914444, "grad_norm": 0.8796271681785583, "learning_rate": 8.344660194174758e-06, "loss": 0.1155, "step": 2397 }, { "epoch": 1.6612400415656392, "grad_norm": 0.7829756736755371, "learning_rate": 8.343966712898752e-06, "loss": 0.0842, "step": 2398 }, { "epoch": 1.661932802216834, "grad_norm": 0.8052772879600525, "learning_rate": 8.343273231622746e-06, "loss": 0.0867, "step": 2399 }, { "epoch": 1.662625562868029, "grad_norm": 0.7163562774658203, "learning_rate": 8.342579750346741e-06, "loss": 0.0817, "step": 2400 }, { "epoch": 1.6633183235192242, "grad_norm": 0.8346788287162781, "learning_rate": 8.341886269070736e-06, "loss": 0.1039, "step": 2401 }, { "epoch": 1.664011084170419, "grad_norm": 0.8440046310424805, "learning_rate": 8.341192787794731e-06, "loss": 0.0859, "step": 2402 }, { "epoch": 1.6647038448216143, "grad_norm": 0.8900326490402222, "learning_rate": 8.340499306518725e-06, "loss": 0.1011, "step": 2403 }, { "epoch": 1.665396605472809, "grad_norm": 0.7580977082252502, "learning_rate": 8.33980582524272e-06, "loss": 0.0772, "step": 2404 }, { "epoch": 1.6660893661240042, "grad_norm": 0.8428370356559753, "learning_rate": 8.339112343966713e-06, "loss": 0.09, "step": 2405 }, { "epoch": 1.6667821267751992, "grad_norm": 0.7755301594734192, "learning_rate": 8.338418862690708e-06, "loss": 0.095, "step": 2406 }, { "epoch": 1.667474887426394, "grad_norm": 0.7946776747703552, "learning_rate": 8.337725381414703e-06, "loss": 0.0922, "step": 2407 }, { "epoch": 1.6681676480775893, "grad_norm": 0.7756094932556152, "learning_rate": 8.337031900138697e-06, "loss": 0.0895, "step": 2408 }, { "epoch": 1.6688604087287842, "grad_norm": 0.8539913892745972, "learning_rate": 8.336338418862692e-06, "loss": 0.0982, "step": 2409 }, { "epoch": 1.6695531693799792, "grad_norm": 0.7312172651290894, "learning_rate": 8.335644937586686e-06, "loss": 0.0801, "step": 2410 }, { "epoch": 1.6702459300311743, "grad_norm": 0.8065546751022339, "learning_rate": 8.33495145631068e-06, "loss": 0.0941, "step": 2411 }, { "epoch": 1.670938690682369, "grad_norm": 0.7429964542388916, "learning_rate": 8.334257975034676e-06, "loss": 0.0828, "step": 2412 }, { "epoch": 1.6716314513335644, "grad_norm": 0.8563544154167175, "learning_rate": 8.333564493758669e-06, "loss": 0.095, "step": 2413 }, { "epoch": 1.6723242119847592, "grad_norm": 0.8839187026023865, "learning_rate": 8.332871012482664e-06, "loss": 0.1261, "step": 2414 }, { "epoch": 1.6730169726359543, "grad_norm": 0.9011917114257812, "learning_rate": 8.332177531206657e-06, "loss": 0.0873, "step": 2415 }, { "epoch": 1.6737097332871493, "grad_norm": 0.8669494390487671, "learning_rate": 8.331484049930652e-06, "loss": 0.113, "step": 2416 }, { "epoch": 1.6744024939383442, "grad_norm": 0.8910118341445923, "learning_rate": 8.330790568654647e-06, "loss": 0.0887, "step": 2417 }, { "epoch": 1.6750952545895395, "grad_norm": 0.9729399085044861, "learning_rate": 8.330097087378642e-06, "loss": 0.0961, "step": 2418 }, { "epoch": 1.6757880152407343, "grad_norm": 0.954855740070343, "learning_rate": 8.329403606102637e-06, "loss": 0.1121, "step": 2419 }, { "epoch": 1.6764807758919293, "grad_norm": 0.8312874436378479, "learning_rate": 8.32871012482663e-06, "loss": 0.097, "step": 2420 }, { "epoch": 1.6771735365431244, "grad_norm": 0.8347145915031433, "learning_rate": 8.328016643550625e-06, "loss": 0.0895, "step": 2421 }, { "epoch": 1.6778662971943192, "grad_norm": 0.8984479904174805, "learning_rate": 8.32732316227462e-06, "loss": 0.0936, "step": 2422 }, { "epoch": 1.6785590578455145, "grad_norm": 0.7961512804031372, "learning_rate": 8.326629680998613e-06, "loss": 0.0836, "step": 2423 }, { "epoch": 1.6792518184967093, "grad_norm": 0.9486488699913025, "learning_rate": 8.325936199722608e-06, "loss": 0.0976, "step": 2424 }, { "epoch": 1.6799445791479044, "grad_norm": 0.8769304156303406, "learning_rate": 8.325242718446603e-06, "loss": 0.1022, "step": 2425 }, { "epoch": 1.6806373397990995, "grad_norm": 0.8644242882728577, "learning_rate": 8.324549237170598e-06, "loss": 0.0875, "step": 2426 }, { "epoch": 1.6813301004502943, "grad_norm": 0.8403621912002563, "learning_rate": 8.323855755894593e-06, "loss": 0.0991, "step": 2427 }, { "epoch": 1.6820228611014896, "grad_norm": 0.8801661729812622, "learning_rate": 8.323162274618586e-06, "loss": 0.104, "step": 2428 }, { "epoch": 1.6827156217526844, "grad_norm": 0.7376656532287598, "learning_rate": 8.322468793342581e-06, "loss": 0.0849, "step": 2429 }, { "epoch": 1.6834083824038795, "grad_norm": 0.7886162996292114, "learning_rate": 8.321775312066574e-06, "loss": 0.0817, "step": 2430 }, { "epoch": 1.6841011430550745, "grad_norm": 0.7826414704322815, "learning_rate": 8.321081830790569e-06, "loss": 0.0939, "step": 2431 }, { "epoch": 1.6847939037062694, "grad_norm": 0.9302990436553955, "learning_rate": 8.320388349514564e-06, "loss": 0.1199, "step": 2432 }, { "epoch": 1.6854866643574646, "grad_norm": 0.87852942943573, "learning_rate": 8.319694868238557e-06, "loss": 0.0981, "step": 2433 }, { "epoch": 1.6861794250086595, "grad_norm": 0.8910174369812012, "learning_rate": 8.319001386962552e-06, "loss": 0.0973, "step": 2434 }, { "epoch": 1.6868721856598545, "grad_norm": 0.8218281269073486, "learning_rate": 8.318307905686547e-06, "loss": 0.0882, "step": 2435 }, { "epoch": 1.6875649463110496, "grad_norm": 0.8782800436019897, "learning_rate": 8.317614424410542e-06, "loss": 0.1105, "step": 2436 }, { "epoch": 1.6882577069622444, "grad_norm": 0.7855113744735718, "learning_rate": 8.316920943134537e-06, "loss": 0.1052, "step": 2437 }, { "epoch": 1.6889504676134397, "grad_norm": 0.8524637222290039, "learning_rate": 8.31622746185853e-06, "loss": 0.1091, "step": 2438 }, { "epoch": 1.6896432282646345, "grad_norm": 0.7963051199913025, "learning_rate": 8.315533980582525e-06, "loss": 0.0945, "step": 2439 }, { "epoch": 1.6903359889158296, "grad_norm": 0.8323280811309814, "learning_rate": 8.314840499306518e-06, "loss": 0.0946, "step": 2440 }, { "epoch": 1.6910287495670246, "grad_norm": 0.8163182139396667, "learning_rate": 8.314147018030513e-06, "loss": 0.0867, "step": 2441 }, { "epoch": 1.6917215102182195, "grad_norm": 0.8384972810745239, "learning_rate": 8.313453536754508e-06, "loss": 0.0982, "step": 2442 }, { "epoch": 1.6924142708694148, "grad_norm": 0.8221649527549744, "learning_rate": 8.312760055478503e-06, "loss": 0.0985, "step": 2443 }, { "epoch": 1.6931070315206096, "grad_norm": 0.832737386226654, "learning_rate": 8.312066574202498e-06, "loss": 0.0946, "step": 2444 }, { "epoch": 1.6937997921718047, "grad_norm": 0.8320939540863037, "learning_rate": 8.311373092926491e-06, "loss": 0.073, "step": 2445 }, { "epoch": 1.6944925528229997, "grad_norm": 0.7923811078071594, "learning_rate": 8.310679611650486e-06, "loss": 0.0871, "step": 2446 }, { "epoch": 1.6951853134741945, "grad_norm": 0.8732138276100159, "learning_rate": 8.309986130374481e-06, "loss": 0.1052, "step": 2447 }, { "epoch": 1.6958780741253898, "grad_norm": 0.8823708891868591, "learning_rate": 8.309292649098474e-06, "loss": 0.0871, "step": 2448 }, { "epoch": 1.6965708347765847, "grad_norm": 0.8793323636054993, "learning_rate": 8.30859916782247e-06, "loss": 0.0879, "step": 2449 }, { "epoch": 1.6972635954277797, "grad_norm": 0.9887531399726868, "learning_rate": 8.307905686546464e-06, "loss": 0.1195, "step": 2450 }, { "epoch": 1.6979563560789748, "grad_norm": 0.7778171896934509, "learning_rate": 8.30721220527046e-06, "loss": 0.0912, "step": 2451 }, { "epoch": 1.6986491167301696, "grad_norm": 0.937286913394928, "learning_rate": 8.306518723994453e-06, "loss": 0.0915, "step": 2452 }, { "epoch": 1.6993418773813649, "grad_norm": 0.8203182220458984, "learning_rate": 8.305825242718447e-06, "loss": 0.0967, "step": 2453 }, { "epoch": 1.7000346380325597, "grad_norm": 0.8339923620223999, "learning_rate": 8.305131761442442e-06, "loss": 0.0997, "step": 2454 }, { "epoch": 1.7007273986837548, "grad_norm": 0.8548272252082825, "learning_rate": 8.304438280166436e-06, "loss": 0.0978, "step": 2455 }, { "epoch": 1.7014201593349498, "grad_norm": 0.907862663269043, "learning_rate": 8.30374479889043e-06, "loss": 0.0938, "step": 2456 }, { "epoch": 1.7021129199861447, "grad_norm": 0.819660484790802, "learning_rate": 8.303051317614425e-06, "loss": 0.0763, "step": 2457 }, { "epoch": 1.70280568063734, "grad_norm": 0.7305322885513306, "learning_rate": 8.302357836338419e-06, "loss": 0.0891, "step": 2458 }, { "epoch": 1.7034984412885348, "grad_norm": 0.8794694542884827, "learning_rate": 8.301664355062414e-06, "loss": 0.1, "step": 2459 }, { "epoch": 1.7041912019397298, "grad_norm": 0.9221211075782776, "learning_rate": 8.300970873786409e-06, "loss": 0.1073, "step": 2460 }, { "epoch": 1.704883962590925, "grad_norm": 0.8608057498931885, "learning_rate": 8.300277392510404e-06, "loss": 0.0943, "step": 2461 }, { "epoch": 1.7055767232421197, "grad_norm": 0.767064094543457, "learning_rate": 8.299583911234398e-06, "loss": 0.083, "step": 2462 }, { "epoch": 1.706269483893315, "grad_norm": 0.8210756778717041, "learning_rate": 8.298890429958392e-06, "loss": 0.0907, "step": 2463 }, { "epoch": 1.7069622445445098, "grad_norm": 0.8764961361885071, "learning_rate": 8.298196948682387e-06, "loss": 0.0933, "step": 2464 }, { "epoch": 1.707655005195705, "grad_norm": 0.8870079517364502, "learning_rate": 8.29750346740638e-06, "loss": 0.1225, "step": 2465 }, { "epoch": 1.7083477658469, "grad_norm": 0.8085984587669373, "learning_rate": 8.296809986130375e-06, "loss": 0.093, "step": 2466 }, { "epoch": 1.7090405264980948, "grad_norm": 0.7712332606315613, "learning_rate": 8.29611650485437e-06, "loss": 0.0869, "step": 2467 }, { "epoch": 1.70973328714929, "grad_norm": 0.9115517139434814, "learning_rate": 8.295423023578365e-06, "loss": 0.102, "step": 2468 }, { "epoch": 1.710426047800485, "grad_norm": 0.8816676735877991, "learning_rate": 8.29472954230236e-06, "loss": 0.0975, "step": 2469 }, { "epoch": 1.71111880845168, "grad_norm": 0.821628212928772, "learning_rate": 8.294036061026353e-06, "loss": 0.1004, "step": 2470 }, { "epoch": 1.711811569102875, "grad_norm": 0.8490061163902283, "learning_rate": 8.293342579750348e-06, "loss": 0.0797, "step": 2471 }, { "epoch": 1.7125043297540699, "grad_norm": 0.8417266607284546, "learning_rate": 8.292649098474343e-06, "loss": 0.0939, "step": 2472 }, { "epoch": 1.7131970904052651, "grad_norm": 0.7965371608734131, "learning_rate": 8.291955617198336e-06, "loss": 0.0999, "step": 2473 }, { "epoch": 1.71388985105646, "grad_norm": 0.886691689491272, "learning_rate": 8.291262135922331e-06, "loss": 0.1093, "step": 2474 }, { "epoch": 1.714582611707655, "grad_norm": 0.8134506940841675, "learning_rate": 8.290568654646324e-06, "loss": 0.0912, "step": 2475 }, { "epoch": 1.71527537235885, "grad_norm": 0.8399380445480347, "learning_rate": 8.289875173370319e-06, "loss": 0.0968, "step": 2476 }, { "epoch": 1.715968133010045, "grad_norm": 1.010575771331787, "learning_rate": 8.289181692094314e-06, "loss": 0.1254, "step": 2477 }, { "epoch": 1.7166608936612402, "grad_norm": 0.8292006254196167, "learning_rate": 8.288488210818309e-06, "loss": 0.0889, "step": 2478 }, { "epoch": 1.717353654312435, "grad_norm": 0.8920153379440308, "learning_rate": 8.287794729542304e-06, "loss": 0.0835, "step": 2479 }, { "epoch": 1.71804641496363, "grad_norm": 0.7608556151390076, "learning_rate": 8.287101248266297e-06, "loss": 0.08, "step": 2480 }, { "epoch": 1.7187391756148251, "grad_norm": 0.7679067254066467, "learning_rate": 8.286407766990292e-06, "loss": 0.0807, "step": 2481 }, { "epoch": 1.71943193626602, "grad_norm": 0.9358129501342773, "learning_rate": 8.285714285714287e-06, "loss": 0.101, "step": 2482 }, { "epoch": 1.7201246969172153, "grad_norm": 0.8521108627319336, "learning_rate": 8.28502080443828e-06, "loss": 0.0766, "step": 2483 }, { "epoch": 1.72081745756841, "grad_norm": 0.7279655933380127, "learning_rate": 8.284327323162275e-06, "loss": 0.0747, "step": 2484 }, { "epoch": 1.7215102182196051, "grad_norm": 0.8134982585906982, "learning_rate": 8.28363384188627e-06, "loss": 0.0778, "step": 2485 }, { "epoch": 1.7222029788708002, "grad_norm": 0.8102089166641235, "learning_rate": 8.282940360610265e-06, "loss": 0.0872, "step": 2486 }, { "epoch": 1.722895739521995, "grad_norm": 0.9148496985435486, "learning_rate": 8.28224687933426e-06, "loss": 0.0999, "step": 2487 }, { "epoch": 1.7235885001731903, "grad_norm": 0.9564724564552307, "learning_rate": 8.281553398058253e-06, "loss": 0.0738, "step": 2488 }, { "epoch": 1.7242812608243852, "grad_norm": 0.8015518188476562, "learning_rate": 8.280859916782248e-06, "loss": 0.1031, "step": 2489 }, { "epoch": 1.7249740214755802, "grad_norm": 0.7892102599143982, "learning_rate": 8.280166435506241e-06, "loss": 0.074, "step": 2490 }, { "epoch": 1.7256667821267753, "grad_norm": 0.811720609664917, "learning_rate": 8.279472954230236e-06, "loss": 0.0913, "step": 2491 }, { "epoch": 1.72635954277797, "grad_norm": 0.8779986500740051, "learning_rate": 8.278779472954231e-06, "loss": 0.1088, "step": 2492 }, { "epoch": 1.7270523034291654, "grad_norm": 0.7302936911582947, "learning_rate": 8.278085991678224e-06, "loss": 0.0724, "step": 2493 }, { "epoch": 1.7277450640803602, "grad_norm": 0.9817513227462769, "learning_rate": 8.27739251040222e-06, "loss": 0.0948, "step": 2494 }, { "epoch": 1.7284378247315553, "grad_norm": 0.8882198929786682, "learning_rate": 8.276699029126214e-06, "loss": 0.1112, "step": 2495 }, { "epoch": 1.7291305853827503, "grad_norm": 0.911967933177948, "learning_rate": 8.27600554785021e-06, "loss": 0.1198, "step": 2496 }, { "epoch": 1.7298233460339452, "grad_norm": 0.7240428328514099, "learning_rate": 8.275312066574204e-06, "loss": 0.0806, "step": 2497 }, { "epoch": 1.7305161066851404, "grad_norm": 0.8238398432731628, "learning_rate": 8.274618585298197e-06, "loss": 0.0913, "step": 2498 }, { "epoch": 1.7312088673363353, "grad_norm": 0.784412145614624, "learning_rate": 8.273925104022192e-06, "loss": 0.0912, "step": 2499 }, { "epoch": 1.7319016279875303, "grad_norm": 0.8094228506088257, "learning_rate": 8.273231622746186e-06, "loss": 0.0973, "step": 2500 }, { "epoch": 1.7325943886387254, "grad_norm": 0.8541341423988342, "learning_rate": 8.27253814147018e-06, "loss": 0.0976, "step": 2501 }, { "epoch": 1.7332871492899202, "grad_norm": 0.7942793965339661, "learning_rate": 8.271844660194175e-06, "loss": 0.078, "step": 2502 }, { "epoch": 1.7339799099411155, "grad_norm": 0.787823498249054, "learning_rate": 8.27115117891817e-06, "loss": 0.0909, "step": 2503 }, { "epoch": 1.7346726705923103, "grad_norm": 0.8610603213310242, "learning_rate": 8.270457697642165e-06, "loss": 0.0981, "step": 2504 }, { "epoch": 1.7353654312435054, "grad_norm": 0.9186752438545227, "learning_rate": 8.269764216366159e-06, "loss": 0.0874, "step": 2505 }, { "epoch": 1.7360581918947005, "grad_norm": 0.7902175188064575, "learning_rate": 8.269070735090154e-06, "loss": 0.0847, "step": 2506 }, { "epoch": 1.7367509525458953, "grad_norm": 0.8692470788955688, "learning_rate": 8.268377253814148e-06, "loss": 0.095, "step": 2507 }, { "epoch": 1.7374437131970906, "grad_norm": 0.8168033361434937, "learning_rate": 8.267683772538142e-06, "loss": 0.1165, "step": 2508 }, { "epoch": 1.7381364738482854, "grad_norm": 0.8191872835159302, "learning_rate": 8.266990291262137e-06, "loss": 0.0948, "step": 2509 }, { "epoch": 1.7388292344994805, "grad_norm": 0.9185828566551208, "learning_rate": 8.266296809986132e-06, "loss": 0.0879, "step": 2510 }, { "epoch": 1.7395219951506755, "grad_norm": 0.8484848737716675, "learning_rate": 8.265603328710125e-06, "loss": 0.116, "step": 2511 }, { "epoch": 1.7402147558018703, "grad_norm": 0.8429151773452759, "learning_rate": 8.26490984743412e-06, "loss": 0.0897, "step": 2512 }, { "epoch": 1.7409075164530656, "grad_norm": 0.8342225551605225, "learning_rate": 8.264216366158115e-06, "loss": 0.1034, "step": 2513 }, { "epoch": 1.7416002771042605, "grad_norm": 0.8275694251060486, "learning_rate": 8.26352288488211e-06, "loss": 0.0925, "step": 2514 }, { "epoch": 1.7422930377554555, "grad_norm": 0.8445605039596558, "learning_rate": 8.262829403606103e-06, "loss": 0.0778, "step": 2515 }, { "epoch": 1.7429857984066506, "grad_norm": 0.8089935183525085, "learning_rate": 8.262135922330098e-06, "loss": 0.0901, "step": 2516 }, { "epoch": 1.7436785590578454, "grad_norm": 0.8095570206642151, "learning_rate": 8.261442441054093e-06, "loss": 0.0897, "step": 2517 }, { "epoch": 1.7443713197090407, "grad_norm": 0.7870088815689087, "learning_rate": 8.260748959778086e-06, "loss": 0.0717, "step": 2518 }, { "epoch": 1.7450640803602355, "grad_norm": 0.8679880499839783, "learning_rate": 8.260055478502081e-06, "loss": 0.1004, "step": 2519 }, { "epoch": 1.7457568410114306, "grad_norm": 0.7995391488075256, "learning_rate": 8.259361997226076e-06, "loss": 0.0896, "step": 2520 }, { "epoch": 1.7464496016626256, "grad_norm": 0.6763067841529846, "learning_rate": 8.25866851595007e-06, "loss": 0.0682, "step": 2521 }, { "epoch": 1.7471423623138205, "grad_norm": 0.8070166707038879, "learning_rate": 8.257975034674066e-06, "loss": 0.0926, "step": 2522 }, { "epoch": 1.7478351229650158, "grad_norm": 1.0157724618911743, "learning_rate": 8.257281553398059e-06, "loss": 0.0986, "step": 2523 }, { "epoch": 1.7485278836162106, "grad_norm": 0.844245970249176, "learning_rate": 8.256588072122054e-06, "loss": 0.0841, "step": 2524 }, { "epoch": 1.7492206442674056, "grad_norm": 0.9986148476600647, "learning_rate": 8.255894590846047e-06, "loss": 0.1219, "step": 2525 }, { "epoch": 1.7499134049186007, "grad_norm": 0.9110881090164185, "learning_rate": 8.255201109570042e-06, "loss": 0.095, "step": 2526 }, { "epoch": 1.7506061655697955, "grad_norm": 0.8442522883415222, "learning_rate": 8.254507628294037e-06, "loss": 0.0901, "step": 2527 }, { "epoch": 1.7512989262209906, "grad_norm": 0.7971848845481873, "learning_rate": 8.253814147018032e-06, "loss": 0.0819, "step": 2528 }, { "epoch": 1.7519916868721856, "grad_norm": 0.7700503468513489, "learning_rate": 8.253120665742025e-06, "loss": 0.0788, "step": 2529 }, { "epoch": 1.7526844475233807, "grad_norm": 0.8246543407440186, "learning_rate": 8.25242718446602e-06, "loss": 0.0915, "step": 2530 }, { "epoch": 1.7533772081745758, "grad_norm": 0.96554034948349, "learning_rate": 8.251733703190015e-06, "loss": 0.1027, "step": 2531 }, { "epoch": 1.7540699688257706, "grad_norm": 0.882771372795105, "learning_rate": 8.25104022191401e-06, "loss": 0.1009, "step": 2532 }, { "epoch": 1.7547627294769657, "grad_norm": 0.8232872486114502, "learning_rate": 8.250346740638003e-06, "loss": 0.1193, "step": 2533 }, { "epoch": 1.7554554901281607, "grad_norm": 0.7570979595184326, "learning_rate": 8.249653259361998e-06, "loss": 0.086, "step": 2534 }, { "epoch": 1.7561482507793558, "grad_norm": 0.8680220246315002, "learning_rate": 8.248959778085991e-06, "loss": 0.0922, "step": 2535 }, { "epoch": 1.7568410114305508, "grad_norm": 0.7393411993980408, "learning_rate": 8.248266296809986e-06, "loss": 0.0701, "step": 2536 }, { "epoch": 1.7575337720817457, "grad_norm": 0.8233626484870911, "learning_rate": 8.247572815533981e-06, "loss": 0.0867, "step": 2537 }, { "epoch": 1.7582265327329407, "grad_norm": 0.927104115486145, "learning_rate": 8.246879334257976e-06, "loss": 0.1022, "step": 2538 }, { "epoch": 1.7589192933841358, "grad_norm": 0.7783875465393066, "learning_rate": 8.246185852981971e-06, "loss": 0.0857, "step": 2539 }, { "epoch": 1.7596120540353308, "grad_norm": 0.9003371596336365, "learning_rate": 8.245492371705964e-06, "loss": 0.1037, "step": 2540 }, { "epoch": 1.7603048146865259, "grad_norm": 0.8483417630195618, "learning_rate": 8.24479889042996e-06, "loss": 0.0833, "step": 2541 }, { "epoch": 1.7609975753377207, "grad_norm": 0.806175172328949, "learning_rate": 8.244105409153952e-06, "loss": 0.0896, "step": 2542 }, { "epoch": 1.7616903359889158, "grad_norm": 0.8583565950393677, "learning_rate": 8.243411927877947e-06, "loss": 0.093, "step": 2543 }, { "epoch": 1.7623830966401108, "grad_norm": 0.8102852702140808, "learning_rate": 8.242718446601942e-06, "loss": 0.0804, "step": 2544 }, { "epoch": 1.763075857291306, "grad_norm": 0.9145060777664185, "learning_rate": 8.242024965325937e-06, "loss": 0.1106, "step": 2545 }, { "epoch": 1.763768617942501, "grad_norm": 0.9196067452430725, "learning_rate": 8.241331484049932e-06, "loss": 0.0947, "step": 2546 }, { "epoch": 1.7644613785936958, "grad_norm": 0.9018850922584534, "learning_rate": 8.240638002773925e-06, "loss": 0.1057, "step": 2547 }, { "epoch": 1.7651541392448908, "grad_norm": 0.8649345636367798, "learning_rate": 8.23994452149792e-06, "loss": 0.0845, "step": 2548 }, { "epoch": 1.765846899896086, "grad_norm": 0.7476176619529724, "learning_rate": 8.239251040221915e-06, "loss": 0.0898, "step": 2549 }, { "epoch": 1.766539660547281, "grad_norm": 0.8914130330085754, "learning_rate": 8.238557558945909e-06, "loss": 0.1024, "step": 2550 }, { "epoch": 1.767232421198476, "grad_norm": 0.8068163990974426, "learning_rate": 8.237864077669903e-06, "loss": 0.0938, "step": 2551 }, { "epoch": 1.7679251818496708, "grad_norm": 0.8041585087776184, "learning_rate": 8.237170596393897e-06, "loss": 0.0971, "step": 2552 }, { "epoch": 1.768617942500866, "grad_norm": 2.1894426345825195, "learning_rate": 8.236477115117892e-06, "loss": 0.0931, "step": 2553 }, { "epoch": 1.769310703152061, "grad_norm": 0.8765089511871338, "learning_rate": 8.235783633841887e-06, "loss": 0.079, "step": 2554 }, { "epoch": 1.770003463803256, "grad_norm": 0.8140201568603516, "learning_rate": 8.235090152565882e-06, "loss": 0.0785, "step": 2555 }, { "epoch": 1.770696224454451, "grad_norm": 0.7975987792015076, "learning_rate": 8.234396671289876e-06, "loss": 0.0819, "step": 2556 }, { "epoch": 1.771388985105646, "grad_norm": 0.9499161839485168, "learning_rate": 8.23370319001387e-06, "loss": 0.0903, "step": 2557 }, { "epoch": 1.772081745756841, "grad_norm": 0.8398935198783875, "learning_rate": 8.233009708737865e-06, "loss": 0.1053, "step": 2558 }, { "epoch": 1.772774506408036, "grad_norm": 0.8754689693450928, "learning_rate": 8.23231622746186e-06, "loss": 0.0989, "step": 2559 }, { "epoch": 1.773467267059231, "grad_norm": 0.8374683856964111, "learning_rate": 8.231622746185853e-06, "loss": 0.0741, "step": 2560 }, { "epoch": 1.7741600277104261, "grad_norm": 0.9576423764228821, "learning_rate": 8.230929264909848e-06, "loss": 0.1182, "step": 2561 }, { "epoch": 1.774852788361621, "grad_norm": 0.9090636372566223, "learning_rate": 8.230235783633843e-06, "loss": 0.1065, "step": 2562 }, { "epoch": 1.775545549012816, "grad_norm": 0.8778228759765625, "learning_rate": 8.229542302357838e-06, "loss": 0.0844, "step": 2563 }, { "epoch": 1.776238309664011, "grad_norm": 0.8534209728240967, "learning_rate": 8.228848821081833e-06, "loss": 0.0941, "step": 2564 }, { "epoch": 1.7769310703152061, "grad_norm": 0.8219755291938782, "learning_rate": 8.228155339805826e-06, "loss": 0.0951, "step": 2565 }, { "epoch": 1.7776238309664012, "grad_norm": 0.8871533274650574, "learning_rate": 8.22746185852982e-06, "loss": 0.1154, "step": 2566 }, { "epoch": 1.778316591617596, "grad_norm": 0.9020233750343323, "learning_rate": 8.226768377253814e-06, "loss": 0.0926, "step": 2567 }, { "epoch": 1.779009352268791, "grad_norm": 0.9474078416824341, "learning_rate": 8.226074895977809e-06, "loss": 0.1006, "step": 2568 }, { "epoch": 1.7797021129199861, "grad_norm": 0.8097711205482483, "learning_rate": 8.225381414701804e-06, "loss": 0.1019, "step": 2569 }, { "epoch": 1.7803948735711812, "grad_norm": 0.9169875979423523, "learning_rate": 8.224687933425797e-06, "loss": 0.0968, "step": 2570 }, { "epoch": 1.7810876342223763, "grad_norm": 0.7902358174324036, "learning_rate": 8.223994452149792e-06, "loss": 0.0715, "step": 2571 }, { "epoch": 1.781780394873571, "grad_norm": 0.842688262462616, "learning_rate": 8.223300970873787e-06, "loss": 0.0991, "step": 2572 }, { "epoch": 1.7824731555247662, "grad_norm": 1.0050466060638428, "learning_rate": 8.222607489597782e-06, "loss": 0.1143, "step": 2573 }, { "epoch": 1.7831659161759612, "grad_norm": 0.8606399297714233, "learning_rate": 8.221914008321777e-06, "loss": 0.0982, "step": 2574 }, { "epoch": 1.7838586768271563, "grad_norm": 0.7917369604110718, "learning_rate": 8.22122052704577e-06, "loss": 0.1021, "step": 2575 }, { "epoch": 1.7845514374783513, "grad_norm": 0.8177589178085327, "learning_rate": 8.220527045769765e-06, "loss": 0.1045, "step": 2576 }, { "epoch": 1.7852441981295462, "grad_norm": 0.7824551463127136, "learning_rate": 8.219833564493758e-06, "loss": 0.0909, "step": 2577 }, { "epoch": 1.7859369587807412, "grad_norm": 0.776489794254303, "learning_rate": 8.219140083217753e-06, "loss": 0.0915, "step": 2578 }, { "epoch": 1.7866297194319363, "grad_norm": 0.9233396649360657, "learning_rate": 8.218446601941748e-06, "loss": 0.0987, "step": 2579 }, { "epoch": 1.7873224800831313, "grad_norm": 0.7486032247543335, "learning_rate": 8.217753120665743e-06, "loss": 0.0673, "step": 2580 }, { "epoch": 1.7880152407343264, "grad_norm": 0.9192646741867065, "learning_rate": 8.217059639389738e-06, "loss": 0.1029, "step": 2581 }, { "epoch": 1.7887080013855212, "grad_norm": 0.9108365774154663, "learning_rate": 8.216366158113731e-06, "loss": 0.0936, "step": 2582 }, { "epoch": 1.7894007620367163, "grad_norm": 0.8713735342025757, "learning_rate": 8.215672676837726e-06, "loss": 0.0923, "step": 2583 }, { "epoch": 1.7900935226879113, "grad_norm": 0.8167701363563538, "learning_rate": 8.214979195561721e-06, "loss": 0.0987, "step": 2584 }, { "epoch": 1.7907862833391064, "grad_norm": 0.8005883693695068, "learning_rate": 8.214285714285714e-06, "loss": 0.0821, "step": 2585 }, { "epoch": 1.7914790439903014, "grad_norm": 0.8728950023651123, "learning_rate": 8.21359223300971e-06, "loss": 0.0888, "step": 2586 }, { "epoch": 1.7921718046414963, "grad_norm": 0.9217014908790588, "learning_rate": 8.212898751733704e-06, "loss": 0.092, "step": 2587 }, { "epoch": 1.7928645652926913, "grad_norm": 0.8786712288856506, "learning_rate": 8.212205270457697e-06, "loss": 0.0922, "step": 2588 }, { "epoch": 1.7935573259438864, "grad_norm": 0.8297293782234192, "learning_rate": 8.211511789181692e-06, "loss": 0.087, "step": 2589 }, { "epoch": 1.7942500865950815, "grad_norm": 0.8937293291091919, "learning_rate": 8.210818307905687e-06, "loss": 0.1082, "step": 2590 }, { "epoch": 1.7949428472462765, "grad_norm": 0.9118964672088623, "learning_rate": 8.210124826629682e-06, "loss": 0.1229, "step": 2591 }, { "epoch": 1.7956356078974713, "grad_norm": 0.8846879005432129, "learning_rate": 8.209431345353675e-06, "loss": 0.1107, "step": 2592 }, { "epoch": 1.7963283685486664, "grad_norm": 0.9043500423431396, "learning_rate": 8.20873786407767e-06, "loss": 0.1271, "step": 2593 }, { "epoch": 1.7970211291998615, "grad_norm": 0.9226686954498291, "learning_rate": 8.208044382801665e-06, "loss": 0.1101, "step": 2594 }, { "epoch": 1.7977138898510565, "grad_norm": 0.9043811559677124, "learning_rate": 8.207350901525659e-06, "loss": 0.0766, "step": 2595 }, { "epoch": 1.7984066505022516, "grad_norm": 0.8195856213569641, "learning_rate": 8.206657420249653e-06, "loss": 0.0896, "step": 2596 }, { "epoch": 1.7990994111534464, "grad_norm": 0.8749009370803833, "learning_rate": 8.205963938973648e-06, "loss": 0.1003, "step": 2597 }, { "epoch": 1.7997921718046415, "grad_norm": 0.9396862387657166, "learning_rate": 8.205270457697643e-06, "loss": 0.1147, "step": 2598 }, { "epoch": 1.8004849324558365, "grad_norm": 0.8920246362686157, "learning_rate": 8.204576976421638e-06, "loss": 0.1166, "step": 2599 }, { "epoch": 1.8011776931070316, "grad_norm": 0.9035167694091797, "learning_rate": 8.203883495145632e-06, "loss": 0.08, "step": 2600 }, { "epoch": 1.8018704537582266, "grad_norm": 0.7884556651115417, "learning_rate": 8.203190013869626e-06, "loss": 0.0887, "step": 2601 }, { "epoch": 1.8025632144094215, "grad_norm": 0.9172216653823853, "learning_rate": 8.20249653259362e-06, "loss": 0.0902, "step": 2602 }, { "epoch": 1.8032559750606165, "grad_norm": 0.7486192584037781, "learning_rate": 8.201803051317615e-06, "loss": 0.0867, "step": 2603 }, { "epoch": 1.8039487357118116, "grad_norm": 0.9297676086425781, "learning_rate": 8.20110957004161e-06, "loss": 0.0926, "step": 2604 }, { "epoch": 1.8046414963630066, "grad_norm": 0.9983718395233154, "learning_rate": 8.200416088765604e-06, "loss": 0.0912, "step": 2605 }, { "epoch": 1.8053342570142017, "grad_norm": 0.7971341609954834, "learning_rate": 8.1997226074896e-06, "loss": 0.0905, "step": 2606 }, { "epoch": 1.8060270176653965, "grad_norm": 0.7908374071121216, "learning_rate": 8.199029126213593e-06, "loss": 0.0728, "step": 2607 }, { "epoch": 1.8067197783165916, "grad_norm": 0.8686855435371399, "learning_rate": 8.198335644937588e-06, "loss": 0.0931, "step": 2608 }, { "epoch": 1.8074125389677866, "grad_norm": 1.0262415409088135, "learning_rate": 8.197642163661583e-06, "loss": 0.107, "step": 2609 }, { "epoch": 1.8081052996189815, "grad_norm": 0.8252395987510681, "learning_rate": 8.196948682385576e-06, "loss": 0.0926, "step": 2610 }, { "epoch": 1.8087980602701768, "grad_norm": 0.8116162419319153, "learning_rate": 8.19625520110957e-06, "loss": 0.0894, "step": 2611 }, { "epoch": 1.8094908209213716, "grad_norm": 0.8530290722846985, "learning_rate": 8.195561719833564e-06, "loss": 0.096, "step": 2612 }, { "epoch": 1.8101835815725666, "grad_norm": 0.8475037813186646, "learning_rate": 8.194868238557559e-06, "loss": 0.0859, "step": 2613 }, { "epoch": 1.8108763422237617, "grad_norm": 0.8325061202049255, "learning_rate": 8.194174757281554e-06, "loss": 0.0987, "step": 2614 }, { "epoch": 1.8115691028749565, "grad_norm": 0.9618691205978394, "learning_rate": 8.193481276005549e-06, "loss": 0.126, "step": 2615 }, { "epoch": 1.8122618635261518, "grad_norm": 0.838097333908081, "learning_rate": 8.192787794729544e-06, "loss": 0.0698, "step": 2616 }, { "epoch": 1.8129546241773467, "grad_norm": 0.8407849669456482, "learning_rate": 8.192094313453537e-06, "loss": 0.0812, "step": 2617 }, { "epoch": 1.8136473848285417, "grad_norm": 0.8925338387489319, "learning_rate": 8.191400832177532e-06, "loss": 0.0942, "step": 2618 }, { "epoch": 1.8143401454797368, "grad_norm": 0.8784374594688416, "learning_rate": 8.190707350901527e-06, "loss": 0.0964, "step": 2619 }, { "epoch": 1.8150329061309316, "grad_norm": 0.9722921848297119, "learning_rate": 8.19001386962552e-06, "loss": 0.0971, "step": 2620 }, { "epoch": 1.8157256667821269, "grad_norm": 0.9016208648681641, "learning_rate": 8.189320388349515e-06, "loss": 0.1139, "step": 2621 }, { "epoch": 1.8164184274333217, "grad_norm": 0.8219439387321472, "learning_rate": 8.18862690707351e-06, "loss": 0.0819, "step": 2622 }, { "epoch": 1.8171111880845168, "grad_norm": 0.903080940246582, "learning_rate": 8.187933425797505e-06, "loss": 0.0943, "step": 2623 }, { "epoch": 1.8178039487357118, "grad_norm": 0.804472029209137, "learning_rate": 8.1872399445215e-06, "loss": 0.0887, "step": 2624 }, { "epoch": 1.8184967093869067, "grad_norm": 0.8361225128173828, "learning_rate": 8.186546463245493e-06, "loss": 0.0814, "step": 2625 }, { "epoch": 1.819189470038102, "grad_norm": 0.9732152819633484, "learning_rate": 8.185852981969488e-06, "loss": 0.0811, "step": 2626 }, { "epoch": 1.8198822306892968, "grad_norm": 0.8066356182098389, "learning_rate": 8.185159500693481e-06, "loss": 0.0863, "step": 2627 }, { "epoch": 1.8205749913404918, "grad_norm": 0.8024638891220093, "learning_rate": 8.184466019417476e-06, "loss": 0.099, "step": 2628 }, { "epoch": 1.821267751991687, "grad_norm": 1.0059717893600464, "learning_rate": 8.183772538141471e-06, "loss": 0.1005, "step": 2629 }, { "epoch": 1.8219605126428817, "grad_norm": 0.8454689383506775, "learning_rate": 8.183079056865464e-06, "loss": 0.0842, "step": 2630 }, { "epoch": 1.822653273294077, "grad_norm": 1.0026649236679077, "learning_rate": 8.18238557558946e-06, "loss": 0.095, "step": 2631 }, { "epoch": 1.8233460339452718, "grad_norm": 0.8365281820297241, "learning_rate": 8.181692094313454e-06, "loss": 0.0915, "step": 2632 }, { "epoch": 1.824038794596467, "grad_norm": 0.9412815570831299, "learning_rate": 8.180998613037449e-06, "loss": 0.0919, "step": 2633 }, { "epoch": 1.824731555247662, "grad_norm": 0.7574265003204346, "learning_rate": 8.180305131761444e-06, "loss": 0.0759, "step": 2634 }, { "epoch": 1.8254243158988568, "grad_norm": 0.9514365792274475, "learning_rate": 8.179611650485437e-06, "loss": 0.104, "step": 2635 }, { "epoch": 1.826117076550052, "grad_norm": 0.9478408694267273, "learning_rate": 8.178918169209432e-06, "loss": 0.1164, "step": 2636 }, { "epoch": 1.826809837201247, "grad_norm": 0.8486576676368713, "learning_rate": 8.178224687933425e-06, "loss": 0.0999, "step": 2637 }, { "epoch": 1.827502597852442, "grad_norm": 0.8942376375198364, "learning_rate": 8.17753120665742e-06, "loss": 0.0972, "step": 2638 }, { "epoch": 1.828195358503637, "grad_norm": 0.9121793508529663, "learning_rate": 8.176837725381415e-06, "loss": 0.12, "step": 2639 }, { "epoch": 1.8288881191548318, "grad_norm": 0.8649296760559082, "learning_rate": 8.17614424410541e-06, "loss": 0.0963, "step": 2640 }, { "epoch": 1.8295808798060271, "grad_norm": 0.9005740880966187, "learning_rate": 8.175450762829405e-06, "loss": 0.1168, "step": 2641 }, { "epoch": 1.830273640457222, "grad_norm": 1.1905404329299927, "learning_rate": 8.174757281553398e-06, "loss": 0.111, "step": 2642 }, { "epoch": 1.830966401108417, "grad_norm": 0.8033245801925659, "learning_rate": 8.174063800277393e-06, "loss": 0.0846, "step": 2643 }, { "epoch": 1.831659161759612, "grad_norm": 0.8120441436767578, "learning_rate": 8.173370319001388e-06, "loss": 0.0788, "step": 2644 }, { "epoch": 1.832351922410807, "grad_norm": 0.9285821318626404, "learning_rate": 8.172676837725381e-06, "loss": 0.0834, "step": 2645 }, { "epoch": 1.8330446830620022, "grad_norm": 0.9043736457824707, "learning_rate": 8.171983356449376e-06, "loss": 0.1068, "step": 2646 }, { "epoch": 1.833737443713197, "grad_norm": 0.9138674139976501, "learning_rate": 8.17128987517337e-06, "loss": 0.1053, "step": 2647 }, { "epoch": 1.834430204364392, "grad_norm": 0.8692159056663513, "learning_rate": 8.170596393897365e-06, "loss": 0.1023, "step": 2648 }, { "epoch": 1.8351229650155871, "grad_norm": 0.9652042388916016, "learning_rate": 8.16990291262136e-06, "loss": 0.0959, "step": 2649 }, { "epoch": 1.835815725666782, "grad_norm": 0.8220810890197754, "learning_rate": 8.169209431345354e-06, "loss": 0.0821, "step": 2650 }, { "epoch": 1.8365084863179773, "grad_norm": 0.9395898580551147, "learning_rate": 8.16851595006935e-06, "loss": 0.1217, "step": 2651 }, { "epoch": 1.837201246969172, "grad_norm": 0.7368136048316956, "learning_rate": 8.167822468793343e-06, "loss": 0.0816, "step": 2652 }, { "epoch": 1.8378940076203671, "grad_norm": 0.8244364857673645, "learning_rate": 8.167128987517338e-06, "loss": 0.0854, "step": 2653 }, { "epoch": 1.8385867682715622, "grad_norm": 0.931873083114624, "learning_rate": 8.166435506241333e-06, "loss": 0.12, "step": 2654 }, { "epoch": 1.839279528922757, "grad_norm": 0.7082671523094177, "learning_rate": 8.165742024965326e-06, "loss": 0.0607, "step": 2655 }, { "epoch": 1.8399722895739523, "grad_norm": 0.7946502566337585, "learning_rate": 8.16504854368932e-06, "loss": 0.0846, "step": 2656 }, { "epoch": 1.8406650502251471, "grad_norm": 0.8827128410339355, "learning_rate": 8.164355062413316e-06, "loss": 0.1049, "step": 2657 }, { "epoch": 1.8413578108763422, "grad_norm": 0.7891724705696106, "learning_rate": 8.16366158113731e-06, "loss": 0.0894, "step": 2658 }, { "epoch": 1.8420505715275373, "grad_norm": 0.7655206322669983, "learning_rate": 8.162968099861305e-06, "loss": 0.0945, "step": 2659 }, { "epoch": 1.842743332178732, "grad_norm": 0.7659879326820374, "learning_rate": 8.162274618585299e-06, "loss": 0.0864, "step": 2660 }, { "epoch": 1.8434360928299274, "grad_norm": 0.9626227617263794, "learning_rate": 8.161581137309294e-06, "loss": 0.1046, "step": 2661 }, { "epoch": 1.8441288534811222, "grad_norm": 0.8859971761703491, "learning_rate": 8.160887656033287e-06, "loss": 0.1298, "step": 2662 }, { "epoch": 1.8448216141323173, "grad_norm": 0.7885403037071228, "learning_rate": 8.160194174757282e-06, "loss": 0.0879, "step": 2663 }, { "epoch": 1.8455143747835123, "grad_norm": 0.9495970606803894, "learning_rate": 8.159500693481277e-06, "loss": 0.11, "step": 2664 }, { "epoch": 1.8462071354347072, "grad_norm": 0.7970353364944458, "learning_rate": 8.15880721220527e-06, "loss": 0.0833, "step": 2665 }, { "epoch": 1.8468998960859024, "grad_norm": 0.796225368976593, "learning_rate": 8.158113730929265e-06, "loss": 0.083, "step": 2666 }, { "epoch": 1.8475926567370973, "grad_norm": 0.9057906270027161, "learning_rate": 8.15742024965326e-06, "loss": 0.1051, "step": 2667 }, { "epoch": 1.8482854173882923, "grad_norm": 0.8341637849807739, "learning_rate": 8.156726768377255e-06, "loss": 0.1086, "step": 2668 }, { "epoch": 1.8489781780394874, "grad_norm": 0.7643277049064636, "learning_rate": 8.15603328710125e-06, "loss": 0.0747, "step": 2669 }, { "epoch": 1.8496709386906822, "grad_norm": 0.7797998189926147, "learning_rate": 8.155339805825243e-06, "loss": 0.0743, "step": 2670 }, { "epoch": 1.8503636993418775, "grad_norm": 0.896926999092102, "learning_rate": 8.154646324549238e-06, "loss": 0.097, "step": 2671 }, { "epoch": 1.8510564599930723, "grad_norm": 0.7922436594963074, "learning_rate": 8.153952843273231e-06, "loss": 0.0913, "step": 2672 }, { "epoch": 1.8517492206442674, "grad_norm": 0.8700946569442749, "learning_rate": 8.153259361997226e-06, "loss": 0.0848, "step": 2673 }, { "epoch": 1.8524419812954624, "grad_norm": 0.953936755657196, "learning_rate": 8.152565880721221e-06, "loss": 0.1209, "step": 2674 }, { "epoch": 1.8531347419466573, "grad_norm": 0.8067466020584106, "learning_rate": 8.151872399445216e-06, "loss": 0.1108, "step": 2675 }, { "epoch": 1.8538275025978526, "grad_norm": 0.8336835503578186, "learning_rate": 8.151178918169211e-06, "loss": 0.1026, "step": 2676 }, { "epoch": 1.8545202632490474, "grad_norm": 0.8462893962860107, "learning_rate": 8.150485436893204e-06, "loss": 0.0775, "step": 2677 }, { "epoch": 1.8552130239002425, "grad_norm": 0.8883715867996216, "learning_rate": 8.149791955617199e-06, "loss": 0.1208, "step": 2678 }, { "epoch": 1.8559057845514375, "grad_norm": 0.7880957722663879, "learning_rate": 8.149098474341194e-06, "loss": 0.0932, "step": 2679 }, { "epoch": 1.8565985452026323, "grad_norm": 0.6992374062538147, "learning_rate": 8.148404993065187e-06, "loss": 0.0695, "step": 2680 }, { "epoch": 1.8572913058538276, "grad_norm": 0.8892483115196228, "learning_rate": 8.147711511789182e-06, "loss": 0.1047, "step": 2681 }, { "epoch": 1.8579840665050225, "grad_norm": 0.7660226225852966, "learning_rate": 8.147018030513177e-06, "loss": 0.0902, "step": 2682 }, { "epoch": 1.8586768271562175, "grad_norm": 0.929363489151001, "learning_rate": 8.146324549237172e-06, "loss": 0.1113, "step": 2683 }, { "epoch": 1.8593695878074126, "grad_norm": 0.9377325177192688, "learning_rate": 8.145631067961165e-06, "loss": 0.0966, "step": 2684 }, { "epoch": 1.8600623484586074, "grad_norm": 0.8253400325775146, "learning_rate": 8.14493758668516e-06, "loss": 0.0892, "step": 2685 }, { "epoch": 1.8607551091098027, "grad_norm": 0.816107451915741, "learning_rate": 8.144244105409155e-06, "loss": 0.0805, "step": 2686 }, { "epoch": 1.8614478697609975, "grad_norm": 0.8043034672737122, "learning_rate": 8.143550624133148e-06, "loss": 0.0924, "step": 2687 }, { "epoch": 1.8621406304121926, "grad_norm": 0.8767082691192627, "learning_rate": 8.142857142857143e-06, "loss": 0.1195, "step": 2688 }, { "epoch": 1.8628333910633876, "grad_norm": 0.8499141931533813, "learning_rate": 8.142163661581138e-06, "loss": 0.1162, "step": 2689 }, { "epoch": 1.8635261517145825, "grad_norm": 0.8079686164855957, "learning_rate": 8.141470180305131e-06, "loss": 0.0958, "step": 2690 }, { "epoch": 1.8642189123657777, "grad_norm": 0.897999107837677, "learning_rate": 8.140776699029126e-06, "loss": 0.1013, "step": 2691 }, { "epoch": 1.8649116730169726, "grad_norm": 0.8873015642166138, "learning_rate": 8.140083217753121e-06, "loss": 0.0851, "step": 2692 }, { "epoch": 1.8656044336681676, "grad_norm": 0.8154669404029846, "learning_rate": 8.139389736477116e-06, "loss": 0.0878, "step": 2693 }, { "epoch": 1.8662971943193627, "grad_norm": 0.8138689994812012, "learning_rate": 8.138696255201111e-06, "loss": 0.0907, "step": 2694 }, { "epoch": 1.8669899549705575, "grad_norm": 0.9204639792442322, "learning_rate": 8.138002773925104e-06, "loss": 0.1067, "step": 2695 }, { "epoch": 1.8676827156217528, "grad_norm": 0.9914817214012146, "learning_rate": 8.1373092926491e-06, "loss": 0.0782, "step": 2696 }, { "epoch": 1.8683754762729476, "grad_norm": 0.89031982421875, "learning_rate": 8.136615811373093e-06, "loss": 0.111, "step": 2697 }, { "epoch": 1.8690682369241427, "grad_norm": 0.8497005105018616, "learning_rate": 8.135922330097088e-06, "loss": 0.0887, "step": 2698 }, { "epoch": 1.8697609975753378, "grad_norm": 0.8609863519668579, "learning_rate": 8.135228848821082e-06, "loss": 0.0899, "step": 2699 }, { "epoch": 1.8704537582265326, "grad_norm": 0.8168871402740479, "learning_rate": 8.134535367545077e-06, "loss": 0.0957, "step": 2700 }, { "epoch": 1.8711465188777279, "grad_norm": 0.878494143486023, "learning_rate": 8.133841886269072e-06, "loss": 0.0834, "step": 2701 }, { "epoch": 1.8718392795289227, "grad_norm": 0.9112176299095154, "learning_rate": 8.133148404993066e-06, "loss": 0.1281, "step": 2702 }, { "epoch": 1.8725320401801178, "grad_norm": 0.8315407633781433, "learning_rate": 8.13245492371706e-06, "loss": 0.0787, "step": 2703 }, { "epoch": 1.8732248008313128, "grad_norm": 0.6737753748893738, "learning_rate": 8.131761442441055e-06, "loss": 0.0684, "step": 2704 }, { "epoch": 1.8739175614825077, "grad_norm": 0.9361903667449951, "learning_rate": 8.131067961165049e-06, "loss": 0.1022, "step": 2705 }, { "epoch": 1.874610322133703, "grad_norm": 0.8183360695838928, "learning_rate": 8.130374479889044e-06, "loss": 0.082, "step": 2706 }, { "epoch": 1.8753030827848978, "grad_norm": 0.8900784850120544, "learning_rate": 8.129680998613037e-06, "loss": 0.118, "step": 2707 }, { "epoch": 1.8759958434360928, "grad_norm": 0.8236899375915527, "learning_rate": 8.128987517337032e-06, "loss": 0.0755, "step": 2708 }, { "epoch": 1.8766886040872879, "grad_norm": 0.8137473464012146, "learning_rate": 8.128294036061027e-06, "loss": 0.1, "step": 2709 }, { "epoch": 1.8773813647384827, "grad_norm": 0.8047782778739929, "learning_rate": 8.127600554785022e-06, "loss": 0.076, "step": 2710 }, { "epoch": 1.878074125389678, "grad_norm": 0.8028963804244995, "learning_rate": 8.126907073509017e-06, "loss": 0.0753, "step": 2711 }, { "epoch": 1.8787668860408728, "grad_norm": 0.8761164546012878, "learning_rate": 8.12621359223301e-06, "loss": 0.092, "step": 2712 }, { "epoch": 1.8794596466920679, "grad_norm": 0.9060432314872742, "learning_rate": 8.125520110957005e-06, "loss": 0.101, "step": 2713 }, { "epoch": 1.880152407343263, "grad_norm": 0.8995921611785889, "learning_rate": 8.124826629681e-06, "loss": 0.1149, "step": 2714 }, { "epoch": 1.8808451679944578, "grad_norm": 0.7580709457397461, "learning_rate": 8.124133148404993e-06, "loss": 0.1024, "step": 2715 }, { "epoch": 1.881537928645653, "grad_norm": 0.8204407095909119, "learning_rate": 8.123439667128988e-06, "loss": 0.0936, "step": 2716 }, { "epoch": 1.882230689296848, "grad_norm": 0.7477433681488037, "learning_rate": 8.122746185852983e-06, "loss": 0.0801, "step": 2717 }, { "epoch": 1.882923449948043, "grad_norm": 0.7480289936065674, "learning_rate": 8.122052704576978e-06, "loss": 0.079, "step": 2718 }, { "epoch": 1.883616210599238, "grad_norm": 0.8686448931694031, "learning_rate": 8.121359223300973e-06, "loss": 0.0977, "step": 2719 }, { "epoch": 1.8843089712504328, "grad_norm": 0.8237332701683044, "learning_rate": 8.120665742024966e-06, "loss": 0.0895, "step": 2720 }, { "epoch": 1.8850017319016281, "grad_norm": 0.9018293619155884, "learning_rate": 8.119972260748961e-06, "loss": 0.1073, "step": 2721 }, { "epoch": 1.885694492552823, "grad_norm": 0.8091353178024292, "learning_rate": 8.119278779472954e-06, "loss": 0.0947, "step": 2722 }, { "epoch": 1.886387253204018, "grad_norm": 0.7719805836677551, "learning_rate": 8.118585298196949e-06, "loss": 0.0858, "step": 2723 }, { "epoch": 1.887080013855213, "grad_norm": 0.8000248670578003, "learning_rate": 8.117891816920944e-06, "loss": 0.0843, "step": 2724 }, { "epoch": 1.887772774506408, "grad_norm": 0.737173855304718, "learning_rate": 8.117198335644937e-06, "loss": 0.0811, "step": 2725 }, { "epoch": 1.8884655351576032, "grad_norm": 0.9293944239616394, "learning_rate": 8.116504854368932e-06, "loss": 0.0915, "step": 2726 }, { "epoch": 1.889158295808798, "grad_norm": 0.8899128437042236, "learning_rate": 8.115811373092927e-06, "loss": 0.1179, "step": 2727 }, { "epoch": 1.889851056459993, "grad_norm": 1.142066478729248, "learning_rate": 8.115117891816922e-06, "loss": 0.1177, "step": 2728 }, { "epoch": 1.8905438171111881, "grad_norm": 0.7394709587097168, "learning_rate": 8.114424410540917e-06, "loss": 0.0812, "step": 2729 }, { "epoch": 1.891236577762383, "grad_norm": 0.7760245203971863, "learning_rate": 8.11373092926491e-06, "loss": 0.0899, "step": 2730 }, { "epoch": 1.8919293384135782, "grad_norm": 0.7233203053474426, "learning_rate": 8.113037447988905e-06, "loss": 0.0859, "step": 2731 }, { "epoch": 1.892622099064773, "grad_norm": 0.8594115376472473, "learning_rate": 8.112343966712898e-06, "loss": 0.0965, "step": 2732 }, { "epoch": 1.8933148597159681, "grad_norm": 0.8868102431297302, "learning_rate": 8.111650485436893e-06, "loss": 0.0973, "step": 2733 }, { "epoch": 1.8940076203671632, "grad_norm": 0.9329622387886047, "learning_rate": 8.110957004160888e-06, "loss": 0.1014, "step": 2734 }, { "epoch": 1.894700381018358, "grad_norm": 0.8387184143066406, "learning_rate": 8.110263522884883e-06, "loss": 0.0808, "step": 2735 }, { "epoch": 1.8953931416695533, "grad_norm": 0.7633192539215088, "learning_rate": 8.109570041608878e-06, "loss": 0.0951, "step": 2736 }, { "epoch": 1.8960859023207481, "grad_norm": 0.8906262516975403, "learning_rate": 8.108876560332871e-06, "loss": 0.0958, "step": 2737 }, { "epoch": 1.8967786629719432, "grad_norm": 0.8757254481315613, "learning_rate": 8.108183079056866e-06, "loss": 0.0963, "step": 2738 }, { "epoch": 1.8974714236231383, "grad_norm": 0.9309480786323547, "learning_rate": 8.107489597780861e-06, "loss": 0.1078, "step": 2739 }, { "epoch": 1.898164184274333, "grad_norm": 0.8245842456817627, "learning_rate": 8.106796116504854e-06, "loss": 0.1053, "step": 2740 }, { "epoch": 1.8988569449255284, "grad_norm": 0.657714307308197, "learning_rate": 8.10610263522885e-06, "loss": 0.062, "step": 2741 }, { "epoch": 1.8995497055767232, "grad_norm": 0.8045280575752258, "learning_rate": 8.105409153952843e-06, "loss": 0.0874, "step": 2742 }, { "epoch": 1.9002424662279183, "grad_norm": 0.728067934513092, "learning_rate": 8.104715672676838e-06, "loss": 0.0624, "step": 2743 }, { "epoch": 1.9009352268791133, "grad_norm": 0.9533117413520813, "learning_rate": 8.104022191400832e-06, "loss": 0.11, "step": 2744 }, { "epoch": 1.9016279875303082, "grad_norm": 0.7989661693572998, "learning_rate": 8.103328710124827e-06, "loss": 0.0838, "step": 2745 }, { "epoch": 1.9023207481815034, "grad_norm": 0.8091473579406738, "learning_rate": 8.102635228848822e-06, "loss": 0.0826, "step": 2746 }, { "epoch": 1.9030135088326983, "grad_norm": 0.9147849082946777, "learning_rate": 8.101941747572816e-06, "loss": 0.1009, "step": 2747 }, { "epoch": 1.9037062694838933, "grad_norm": 0.8592344522476196, "learning_rate": 8.10124826629681e-06, "loss": 0.0824, "step": 2748 }, { "epoch": 1.9043990301350884, "grad_norm": 0.8707457780838013, "learning_rate": 8.100554785020805e-06, "loss": 0.1079, "step": 2749 }, { "epoch": 1.9050917907862832, "grad_norm": 0.8529501557350159, "learning_rate": 8.099861303744799e-06, "loss": 0.0983, "step": 2750 }, { "epoch": 1.9057845514374785, "grad_norm": 0.842431366443634, "learning_rate": 8.099167822468794e-06, "loss": 0.1047, "step": 2751 }, { "epoch": 1.9064773120886733, "grad_norm": 0.8026554584503174, "learning_rate": 8.098474341192789e-06, "loss": 0.0753, "step": 2752 }, { "epoch": 1.9071700727398684, "grad_norm": 0.8910338878631592, "learning_rate": 8.097780859916783e-06, "loss": 0.1004, "step": 2753 }, { "epoch": 1.9078628333910634, "grad_norm": 0.6836819648742676, "learning_rate": 8.097087378640778e-06, "loss": 0.0693, "step": 2754 }, { "epoch": 1.9085555940422583, "grad_norm": 0.7922234535217285, "learning_rate": 8.096393897364772e-06, "loss": 0.0893, "step": 2755 }, { "epoch": 1.9092483546934536, "grad_norm": 0.799467921257019, "learning_rate": 8.095700416088767e-06, "loss": 0.0862, "step": 2756 }, { "epoch": 1.9099411153446484, "grad_norm": 0.9862185120582581, "learning_rate": 8.09500693481276e-06, "loss": 0.0984, "step": 2757 }, { "epoch": 1.9106338759958434, "grad_norm": 1.1314369440078735, "learning_rate": 8.094313453536755e-06, "loss": 0.1002, "step": 2758 }, { "epoch": 1.9113266366470385, "grad_norm": 0.8351694941520691, "learning_rate": 8.09361997226075e-06, "loss": 0.1064, "step": 2759 }, { "epoch": 1.9120193972982333, "grad_norm": 0.9159316420555115, "learning_rate": 8.092926490984745e-06, "loss": 0.0971, "step": 2760 }, { "epoch": 1.9127121579494286, "grad_norm": 0.8018559813499451, "learning_rate": 8.092233009708738e-06, "loss": 0.0847, "step": 2761 }, { "epoch": 1.9134049186006234, "grad_norm": 0.9117119312286377, "learning_rate": 8.091539528432733e-06, "loss": 0.1019, "step": 2762 }, { "epoch": 1.9140976792518185, "grad_norm": 0.7862089276313782, "learning_rate": 8.090846047156728e-06, "loss": 0.0838, "step": 2763 }, { "epoch": 1.9147904399030136, "grad_norm": 0.8904909491539001, "learning_rate": 8.090152565880723e-06, "loss": 0.1102, "step": 2764 }, { "epoch": 1.9154832005542084, "grad_norm": 0.7712727189064026, "learning_rate": 8.089459084604716e-06, "loss": 0.0716, "step": 2765 }, { "epoch": 1.9161759612054037, "grad_norm": 0.7882879376411438, "learning_rate": 8.08876560332871e-06, "loss": 0.092, "step": 2766 }, { "epoch": 1.9168687218565985, "grad_norm": 0.7251750230789185, "learning_rate": 8.088072122052704e-06, "loss": 0.0823, "step": 2767 }, { "epoch": 1.9175614825077936, "grad_norm": 0.8687427639961243, "learning_rate": 8.087378640776699e-06, "loss": 0.09, "step": 2768 }, { "epoch": 1.9182542431589886, "grad_norm": 0.7910054326057434, "learning_rate": 8.086685159500694e-06, "loss": 0.0961, "step": 2769 }, { "epoch": 1.9189470038101835, "grad_norm": 0.7606614232063293, "learning_rate": 8.085991678224689e-06, "loss": 0.0841, "step": 2770 }, { "epoch": 1.9196397644613787, "grad_norm": 0.9103227257728577, "learning_rate": 8.085298196948684e-06, "loss": 0.0889, "step": 2771 }, { "epoch": 1.9203325251125736, "grad_norm": 0.8335845470428467, "learning_rate": 8.084604715672677e-06, "loss": 0.0883, "step": 2772 }, { "epoch": 1.9210252857637686, "grad_norm": 0.8396230340003967, "learning_rate": 8.083911234396672e-06, "loss": 0.0909, "step": 2773 }, { "epoch": 1.9217180464149637, "grad_norm": 0.9027969241142273, "learning_rate": 8.083217753120667e-06, "loss": 0.1057, "step": 2774 }, { "epoch": 1.9224108070661585, "grad_norm": 0.8963081240653992, "learning_rate": 8.08252427184466e-06, "loss": 0.0933, "step": 2775 }, { "epoch": 1.9231035677173538, "grad_norm": 0.8285951614379883, "learning_rate": 8.081830790568655e-06, "loss": 0.0748, "step": 2776 }, { "epoch": 1.9237963283685486, "grad_norm": 0.959260880947113, "learning_rate": 8.08113730929265e-06, "loss": 0.0959, "step": 2777 }, { "epoch": 1.9244890890197437, "grad_norm": 0.7964215278625488, "learning_rate": 8.080443828016645e-06, "loss": 0.0846, "step": 2778 }, { "epoch": 1.9251818496709387, "grad_norm": 0.7677169442176819, "learning_rate": 8.07975034674064e-06, "loss": 0.0955, "step": 2779 }, { "epoch": 1.9258746103221336, "grad_norm": 0.892074704170227, "learning_rate": 8.079056865464633e-06, "loss": 0.0905, "step": 2780 }, { "epoch": 1.9265673709733289, "grad_norm": 0.884797215461731, "learning_rate": 8.078363384188628e-06, "loss": 0.1175, "step": 2781 }, { "epoch": 1.9272601316245237, "grad_norm": 0.9015825986862183, "learning_rate": 8.077669902912621e-06, "loss": 0.1072, "step": 2782 }, { "epoch": 1.9279528922757188, "grad_norm": 0.8209898471832275, "learning_rate": 8.076976421636616e-06, "loss": 0.0833, "step": 2783 }, { "epoch": 1.9286456529269138, "grad_norm": 0.9417157173156738, "learning_rate": 8.076282940360611e-06, "loss": 0.0949, "step": 2784 }, { "epoch": 1.9293384135781086, "grad_norm": 0.8368494510650635, "learning_rate": 8.075589459084604e-06, "loss": 0.1081, "step": 2785 }, { "epoch": 1.930031174229304, "grad_norm": 0.9572821855545044, "learning_rate": 8.0748959778086e-06, "loss": 0.0917, "step": 2786 }, { "epoch": 1.9307239348804988, "grad_norm": 0.8616113662719727, "learning_rate": 8.074202496532594e-06, "loss": 0.1063, "step": 2787 }, { "epoch": 1.9314166955316938, "grad_norm": 0.8733940124511719, "learning_rate": 8.07350901525659e-06, "loss": 0.087, "step": 2788 }, { "epoch": 1.9321094561828889, "grad_norm": 0.9132512807846069, "learning_rate": 8.072815533980584e-06, "loss": 0.0978, "step": 2789 }, { "epoch": 1.9328022168340837, "grad_norm": 0.8157616853713989, "learning_rate": 8.072122052704577e-06, "loss": 0.0748, "step": 2790 }, { "epoch": 1.933494977485279, "grad_norm": 0.7877839207649231, "learning_rate": 8.071428571428572e-06, "loss": 0.0913, "step": 2791 }, { "epoch": 1.9341877381364738, "grad_norm": 0.8950355052947998, "learning_rate": 8.070735090152566e-06, "loss": 0.1013, "step": 2792 }, { "epoch": 1.9348804987876689, "grad_norm": 0.8914209604263306, "learning_rate": 8.07004160887656e-06, "loss": 0.1147, "step": 2793 }, { "epoch": 1.935573259438864, "grad_norm": 0.7586885690689087, "learning_rate": 8.069348127600555e-06, "loss": 0.075, "step": 2794 }, { "epoch": 1.9362660200900588, "grad_norm": 0.9337842464447021, "learning_rate": 8.06865464632455e-06, "loss": 0.1157, "step": 2795 }, { "epoch": 1.936958780741254, "grad_norm": 0.7821459174156189, "learning_rate": 8.067961165048545e-06, "loss": 0.0995, "step": 2796 }, { "epoch": 1.9376515413924489, "grad_norm": 0.8888229131698608, "learning_rate": 8.067267683772539e-06, "loss": 0.1026, "step": 2797 }, { "epoch": 1.938344302043644, "grad_norm": 0.7771438360214233, "learning_rate": 8.066574202496533e-06, "loss": 0.0836, "step": 2798 }, { "epoch": 1.939037062694839, "grad_norm": 0.7105121612548828, "learning_rate": 8.065880721220528e-06, "loss": 0.0706, "step": 2799 }, { "epoch": 1.9397298233460338, "grad_norm": 1.0043034553527832, "learning_rate": 8.065187239944522e-06, "loss": 0.119, "step": 2800 }, { "epoch": 1.9404225839972291, "grad_norm": 0.8070681095123291, "learning_rate": 8.064493758668517e-06, "loss": 0.0992, "step": 2801 }, { "epoch": 1.941115344648424, "grad_norm": 0.86124587059021, "learning_rate": 8.06380027739251e-06, "loss": 0.0968, "step": 2802 }, { "epoch": 1.941808105299619, "grad_norm": 0.8489442467689514, "learning_rate": 8.063106796116505e-06, "loss": 0.1085, "step": 2803 }, { "epoch": 1.942500865950814, "grad_norm": 0.8105494379997253, "learning_rate": 8.0624133148405e-06, "loss": 0.0775, "step": 2804 }, { "epoch": 1.943193626602009, "grad_norm": 0.6798321604728699, "learning_rate": 8.061719833564495e-06, "loss": 0.0694, "step": 2805 }, { "epoch": 1.9438863872532042, "grad_norm": 0.8851845264434814, "learning_rate": 8.06102635228849e-06, "loss": 0.1133, "step": 2806 }, { "epoch": 1.944579147904399, "grad_norm": 0.8672387003898621, "learning_rate": 8.060332871012483e-06, "loss": 0.1009, "step": 2807 }, { "epoch": 1.945271908555594, "grad_norm": 0.8221784234046936, "learning_rate": 8.059639389736478e-06, "loss": 0.0868, "step": 2808 }, { "epoch": 1.9459646692067891, "grad_norm": 0.8535941243171692, "learning_rate": 8.058945908460473e-06, "loss": 0.0892, "step": 2809 }, { "epoch": 1.946657429857984, "grad_norm": 0.8289077877998352, "learning_rate": 8.058252427184466e-06, "loss": 0.0948, "step": 2810 }, { "epoch": 1.9473501905091792, "grad_norm": 0.794834315776825, "learning_rate": 8.05755894590846e-06, "loss": 0.0764, "step": 2811 }, { "epoch": 1.948042951160374, "grad_norm": 0.797599732875824, "learning_rate": 8.056865464632456e-06, "loss": 0.0855, "step": 2812 }, { "epoch": 1.9487357118115691, "grad_norm": 0.8061396479606628, "learning_rate": 8.05617198335645e-06, "loss": 0.0924, "step": 2813 }, { "epoch": 1.9494284724627642, "grad_norm": 0.7848390936851501, "learning_rate": 8.055478502080446e-06, "loss": 0.0924, "step": 2814 }, { "epoch": 1.950121233113959, "grad_norm": 0.982895016670227, "learning_rate": 8.054785020804439e-06, "loss": 0.1266, "step": 2815 }, { "epoch": 1.9508139937651543, "grad_norm": 0.8684736490249634, "learning_rate": 8.054091539528434e-06, "loss": 0.1049, "step": 2816 }, { "epoch": 1.9515067544163491, "grad_norm": 0.8869232535362244, "learning_rate": 8.053398058252427e-06, "loss": 0.1149, "step": 2817 }, { "epoch": 1.9521995150675442, "grad_norm": 0.8731580376625061, "learning_rate": 8.052704576976422e-06, "loss": 0.1045, "step": 2818 }, { "epoch": 1.9528922757187392, "grad_norm": 0.9235742688179016, "learning_rate": 8.052011095700417e-06, "loss": 0.1248, "step": 2819 }, { "epoch": 1.953585036369934, "grad_norm": 0.943651556968689, "learning_rate": 8.05131761442441e-06, "loss": 0.0933, "step": 2820 }, { "epoch": 1.9542777970211294, "grad_norm": 0.7307215929031372, "learning_rate": 8.050624133148405e-06, "loss": 0.0651, "step": 2821 }, { "epoch": 1.9549705576723242, "grad_norm": 0.7968959212303162, "learning_rate": 8.0499306518724e-06, "loss": 0.0993, "step": 2822 }, { "epoch": 1.9556633183235193, "grad_norm": 0.8350526094436646, "learning_rate": 8.049237170596395e-06, "loss": 0.08, "step": 2823 }, { "epoch": 1.9563560789747143, "grad_norm": 0.7916170358657837, "learning_rate": 8.04854368932039e-06, "loss": 0.0848, "step": 2824 }, { "epoch": 1.9570488396259091, "grad_norm": 0.7483957409858704, "learning_rate": 8.047850208044383e-06, "loss": 0.0716, "step": 2825 }, { "epoch": 1.9577416002771044, "grad_norm": 0.7722062468528748, "learning_rate": 8.047156726768378e-06, "loss": 0.0952, "step": 2826 }, { "epoch": 1.9584343609282993, "grad_norm": 0.8419215083122253, "learning_rate": 8.046463245492371e-06, "loss": 0.0846, "step": 2827 }, { "epoch": 1.9591271215794943, "grad_norm": 0.8864947557449341, "learning_rate": 8.045769764216366e-06, "loss": 0.0883, "step": 2828 }, { "epoch": 1.9598198822306894, "grad_norm": 0.865912914276123, "learning_rate": 8.045076282940361e-06, "loss": 0.0983, "step": 2829 }, { "epoch": 1.9605126428818842, "grad_norm": 0.8244338631629944, "learning_rate": 8.044382801664356e-06, "loss": 0.0973, "step": 2830 }, { "epoch": 1.9612054035330795, "grad_norm": 0.762330949306488, "learning_rate": 8.043689320388351e-06, "loss": 0.0802, "step": 2831 }, { "epoch": 1.9618981641842743, "grad_norm": 0.8400306105613708, "learning_rate": 8.042995839112344e-06, "loss": 0.0925, "step": 2832 }, { "epoch": 1.9625909248354694, "grad_norm": 0.8090659976005554, "learning_rate": 8.04230235783634e-06, "loss": 0.0959, "step": 2833 }, { "epoch": 1.9632836854866644, "grad_norm": 0.9807546734809875, "learning_rate": 8.041608876560334e-06, "loss": 0.11, "step": 2834 }, { "epoch": 1.9639764461378593, "grad_norm": 0.7383775115013123, "learning_rate": 8.040915395284327e-06, "loss": 0.0813, "step": 2835 }, { "epoch": 1.9646692067890545, "grad_norm": 1.0186407566070557, "learning_rate": 8.040221914008322e-06, "loss": 0.128, "step": 2836 }, { "epoch": 1.9653619674402494, "grad_norm": 0.7591642737388611, "learning_rate": 8.039528432732317e-06, "loss": 0.0876, "step": 2837 }, { "epoch": 1.9660547280914444, "grad_norm": 0.9109426140785217, "learning_rate": 8.038834951456312e-06, "loss": 0.0865, "step": 2838 }, { "epoch": 1.9667474887426395, "grad_norm": 0.9108000993728638, "learning_rate": 8.038141470180305e-06, "loss": 0.1074, "step": 2839 }, { "epoch": 1.9674402493938343, "grad_norm": 0.9961658120155334, "learning_rate": 8.0374479889043e-06, "loss": 0.1089, "step": 2840 }, { "epoch": 1.9681330100450296, "grad_norm": 1.0040456056594849, "learning_rate": 8.036754507628295e-06, "loss": 0.0915, "step": 2841 }, { "epoch": 1.9688257706962244, "grad_norm": 0.7719624042510986, "learning_rate": 8.036061026352289e-06, "loss": 0.0971, "step": 2842 }, { "epoch": 1.9695185313474195, "grad_norm": 0.8600178360939026, "learning_rate": 8.035367545076283e-06, "loss": 0.0887, "step": 2843 }, { "epoch": 1.9702112919986146, "grad_norm": 0.8480808138847351, "learning_rate": 8.034674063800278e-06, "loss": 0.0853, "step": 2844 }, { "epoch": 1.9709040526498094, "grad_norm": 0.765336275100708, "learning_rate": 8.033980582524272e-06, "loss": 0.0842, "step": 2845 }, { "epoch": 1.9715968133010047, "grad_norm": 0.895420491695404, "learning_rate": 8.033287101248267e-06, "loss": 0.0858, "step": 2846 }, { "epoch": 1.9722895739521995, "grad_norm": 0.9333667159080505, "learning_rate": 8.032593619972261e-06, "loss": 0.088, "step": 2847 }, { "epoch": 1.9729823346033946, "grad_norm": 0.815650463104248, "learning_rate": 8.031900138696256e-06, "loss": 0.0834, "step": 2848 }, { "epoch": 1.9736750952545896, "grad_norm": 0.8807889819145203, "learning_rate": 8.031206657420251e-06, "loss": 0.1003, "step": 2849 }, { "epoch": 1.9743678559057845, "grad_norm": 0.9676668643951416, "learning_rate": 8.030513176144245e-06, "loss": 0.1208, "step": 2850 }, { "epoch": 1.9750606165569795, "grad_norm": 0.8601505756378174, "learning_rate": 8.02981969486824e-06, "loss": 0.1093, "step": 2851 }, { "epoch": 1.9757533772081746, "grad_norm": 0.8118881583213806, "learning_rate": 8.029126213592233e-06, "loss": 0.0895, "step": 2852 }, { "epoch": 1.9764461378593696, "grad_norm": 0.8941214084625244, "learning_rate": 8.028432732316228e-06, "loss": 0.0917, "step": 2853 }, { "epoch": 1.9771388985105647, "grad_norm": 0.9647018909454346, "learning_rate": 8.027739251040223e-06, "loss": 0.1209, "step": 2854 }, { "epoch": 1.9778316591617595, "grad_norm": 0.9033962488174438, "learning_rate": 8.027045769764218e-06, "loss": 0.0974, "step": 2855 }, { "epoch": 1.9785244198129546, "grad_norm": 1.0619659423828125, "learning_rate": 8.026352288488212e-06, "loss": 0.1026, "step": 2856 }, { "epoch": 1.9792171804641496, "grad_norm": 0.7518752813339233, "learning_rate": 8.025658807212206e-06, "loss": 0.0845, "step": 2857 }, { "epoch": 1.9799099411153447, "grad_norm": 0.8235559463500977, "learning_rate": 8.0249653259362e-06, "loss": 0.0884, "step": 2858 }, { "epoch": 1.9806027017665397, "grad_norm": 0.8043603897094727, "learning_rate": 8.024271844660196e-06, "loss": 0.0921, "step": 2859 }, { "epoch": 1.9812954624177346, "grad_norm": 0.8355159759521484, "learning_rate": 8.023578363384189e-06, "loss": 0.0895, "step": 2860 }, { "epoch": 1.9819882230689296, "grad_norm": 0.9968959093093872, "learning_rate": 8.022884882108184e-06, "loss": 0.109, "step": 2861 }, { "epoch": 1.9826809837201247, "grad_norm": 0.8901378512382507, "learning_rate": 8.022191400832177e-06, "loss": 0.1086, "step": 2862 }, { "epoch": 1.9833737443713197, "grad_norm": 0.8336589336395264, "learning_rate": 8.021497919556172e-06, "loss": 0.0877, "step": 2863 }, { "epoch": 1.9840665050225148, "grad_norm": 0.8249452710151672, "learning_rate": 8.020804438280167e-06, "loss": 0.0773, "step": 2864 }, { "epoch": 1.9847592656737096, "grad_norm": 0.8235527276992798, "learning_rate": 8.020110957004162e-06, "loss": 0.0897, "step": 2865 }, { "epoch": 1.9854520263249047, "grad_norm": 0.9498945474624634, "learning_rate": 8.019417475728157e-06, "loss": 0.1136, "step": 2866 }, { "epoch": 1.9861447869760998, "grad_norm": 0.7912014722824097, "learning_rate": 8.01872399445215e-06, "loss": 0.0935, "step": 2867 }, { "epoch": 1.9868375476272948, "grad_norm": 0.7760205268859863, "learning_rate": 8.018030513176145e-06, "loss": 0.0817, "step": 2868 }, { "epoch": 1.9875303082784899, "grad_norm": 0.8328711986541748, "learning_rate": 8.01733703190014e-06, "loss": 0.0877, "step": 2869 }, { "epoch": 1.9882230689296847, "grad_norm": 0.8077870607376099, "learning_rate": 8.016643550624133e-06, "loss": 0.0796, "step": 2870 }, { "epoch": 1.9889158295808798, "grad_norm": 1.1094268560409546, "learning_rate": 8.015950069348128e-06, "loss": 0.0997, "step": 2871 }, { "epoch": 1.9896085902320748, "grad_norm": 0.8952378630638123, "learning_rate": 8.015256588072123e-06, "loss": 0.0955, "step": 2872 }, { "epoch": 1.9903013508832699, "grad_norm": 0.7212051153182983, "learning_rate": 8.014563106796118e-06, "loss": 0.0755, "step": 2873 }, { "epoch": 1.990994111534465, "grad_norm": 0.7911790609359741, "learning_rate": 8.013869625520113e-06, "loss": 0.1015, "step": 2874 }, { "epoch": 1.9916868721856598, "grad_norm": 0.807488203048706, "learning_rate": 8.013176144244106e-06, "loss": 0.1066, "step": 2875 }, { "epoch": 1.9923796328368548, "grad_norm": 0.8579085469245911, "learning_rate": 8.012482662968101e-06, "loss": 0.0918, "step": 2876 }, { "epoch": 1.9930723934880499, "grad_norm": 0.8495670557022095, "learning_rate": 8.011789181692094e-06, "loss": 0.0944, "step": 2877 }, { "epoch": 1.993765154139245, "grad_norm": 0.7934786081314087, "learning_rate": 8.01109570041609e-06, "loss": 0.0792, "step": 2878 }, { "epoch": 1.99445791479044, "grad_norm": 0.9561198353767395, "learning_rate": 8.010402219140084e-06, "loss": 0.1, "step": 2879 }, { "epoch": 1.9951506754416348, "grad_norm": 0.8843298554420471, "learning_rate": 8.009708737864077e-06, "loss": 0.1082, "step": 2880 }, { "epoch": 1.9958434360928299, "grad_norm": 0.9929296374320984, "learning_rate": 8.009015256588072e-06, "loss": 0.1353, "step": 2881 }, { "epoch": 1.996536196744025, "grad_norm": 0.9166862368583679, "learning_rate": 8.008321775312067e-06, "loss": 0.0744, "step": 2882 }, { "epoch": 1.99722895739522, "grad_norm": 0.7657050490379333, "learning_rate": 8.007628294036062e-06, "loss": 0.0863, "step": 2883 }, { "epoch": 1.997921718046415, "grad_norm": 1.0217808485031128, "learning_rate": 8.006934812760057e-06, "loss": 0.1173, "step": 2884 }, { "epoch": 1.9986144786976099, "grad_norm": 1.1051387786865234, "learning_rate": 8.00624133148405e-06, "loss": 0.0966, "step": 2885 }, { "epoch": 1.999307239348805, "grad_norm": 0.908137321472168, "learning_rate": 8.005547850208045e-06, "loss": 0.1077, "step": 2886 }, { "epoch": 2.0, "grad_norm": 1.0401666164398193, "learning_rate": 8.004854368932038e-06, "loss": 0.105, "step": 2887 }, { "epoch": 2.0, "eval_loss": 0.2396479994058609, "eval_runtime": 7675.0388, "eval_samples_per_second": 1.042, "eval_steps_per_second": 0.033, "eval_wer": 13.347775953038191, "step": 2887 }, { "epoch": 2.000692760651195, "grad_norm": 0.5223402976989746, "learning_rate": 8.004160887656033e-06, "loss": 0.0599, "step": 2888 }, { "epoch": 2.00138552130239, "grad_norm": 0.557817816734314, "learning_rate": 8.003467406380028e-06, "loss": 0.0408, "step": 2889 }, { "epoch": 2.002078281953585, "grad_norm": 0.5676665902137756, "learning_rate": 8.002773925104023e-06, "loss": 0.0476, "step": 2890 }, { "epoch": 2.0027710426047802, "grad_norm": 0.6631727814674377, "learning_rate": 8.002080443828018e-06, "loss": 0.0633, "step": 2891 }, { "epoch": 2.003463803255975, "grad_norm": 0.6056278347969055, "learning_rate": 8.001386962552011e-06, "loss": 0.0596, "step": 2892 }, { "epoch": 2.00415656390717, "grad_norm": 0.573501706123352, "learning_rate": 8.000693481276006e-06, "loss": 0.0525, "step": 2893 }, { "epoch": 2.004849324558365, "grad_norm": 0.6473743319511414, "learning_rate": 8.000000000000001e-06, "loss": 0.0541, "step": 2894 }, { "epoch": 2.00554208520956, "grad_norm": 0.6857211589813232, "learning_rate": 7.999306518723995e-06, "loss": 0.0602, "step": 2895 }, { "epoch": 2.0062348458607553, "grad_norm": 0.7229959964752197, "learning_rate": 7.99861303744799e-06, "loss": 0.066, "step": 2896 }, { "epoch": 2.00692760651195, "grad_norm": 0.5958266854286194, "learning_rate": 7.997919556171983e-06, "loss": 0.0621, "step": 2897 }, { "epoch": 2.007620367163145, "grad_norm": 0.6253556609153748, "learning_rate": 7.997226074895978e-06, "loss": 0.0554, "step": 2898 }, { "epoch": 2.0083131278143402, "grad_norm": 0.6457939147949219, "learning_rate": 7.996532593619973e-06, "loss": 0.0652, "step": 2899 }, { "epoch": 2.009005888465535, "grad_norm": 0.7323454022407532, "learning_rate": 7.995839112343968e-06, "loss": 0.0561, "step": 2900 }, { "epoch": 2.0096986491167304, "grad_norm": 0.6677737236022949, "learning_rate": 7.995145631067962e-06, "loss": 0.0748, "step": 2901 }, { "epoch": 2.010391409767925, "grad_norm": 0.7082302570343018, "learning_rate": 7.994452149791956e-06, "loss": 0.0503, "step": 2902 }, { "epoch": 2.01108417041912, "grad_norm": 0.6542059183120728, "learning_rate": 7.99375866851595e-06, "loss": 0.0498, "step": 2903 }, { "epoch": 2.0117769310703153, "grad_norm": 0.6658546328544617, "learning_rate": 7.993065187239946e-06, "loss": 0.0586, "step": 2904 }, { "epoch": 2.01246969172151, "grad_norm": 0.8179011940956116, "learning_rate": 7.992371705963939e-06, "loss": 0.06, "step": 2905 }, { "epoch": 2.0131624523727054, "grad_norm": 0.6736352443695068, "learning_rate": 7.991678224687934e-06, "loss": 0.0544, "step": 2906 }, { "epoch": 2.0138552130239002, "grad_norm": 0.6913522481918335, "learning_rate": 7.990984743411929e-06, "loss": 0.0524, "step": 2907 }, { "epoch": 2.014547973675095, "grad_norm": 0.7478219270706177, "learning_rate": 7.990291262135924e-06, "loss": 0.0601, "step": 2908 }, { "epoch": 2.0152407343262904, "grad_norm": 0.7340624332427979, "learning_rate": 7.989597780859919e-06, "loss": 0.0632, "step": 2909 }, { "epoch": 2.015933494977485, "grad_norm": 0.6508964896202087, "learning_rate": 7.988904299583912e-06, "loss": 0.0575, "step": 2910 }, { "epoch": 2.0166262556286805, "grad_norm": 0.6782339811325073, "learning_rate": 7.988210818307907e-06, "loss": 0.0526, "step": 2911 }, { "epoch": 2.0173190162798753, "grad_norm": 0.6231827735900879, "learning_rate": 7.9875173370319e-06, "loss": 0.0515, "step": 2912 }, { "epoch": 2.01801177693107, "grad_norm": 0.6707166433334351, "learning_rate": 7.986823855755895e-06, "loss": 0.0553, "step": 2913 }, { "epoch": 2.0187045375822654, "grad_norm": 0.6236500144004822, "learning_rate": 7.98613037447989e-06, "loss": 0.0512, "step": 2914 }, { "epoch": 2.0193972982334603, "grad_norm": 0.7530456781387329, "learning_rate": 7.985436893203885e-06, "loss": 0.0567, "step": 2915 }, { "epoch": 2.0200900588846555, "grad_norm": 0.7120798230171204, "learning_rate": 7.984743411927878e-06, "loss": 0.0667, "step": 2916 }, { "epoch": 2.0207828195358504, "grad_norm": 0.6329599618911743, "learning_rate": 7.984049930651873e-06, "loss": 0.0603, "step": 2917 }, { "epoch": 2.021475580187045, "grad_norm": 0.6339494585990906, "learning_rate": 7.983356449375868e-06, "loss": 0.0584, "step": 2918 }, { "epoch": 2.0221683408382405, "grad_norm": 0.6025832891464233, "learning_rate": 7.982662968099863e-06, "loss": 0.0537, "step": 2919 }, { "epoch": 2.0228611014894353, "grad_norm": 0.6312150359153748, "learning_rate": 7.981969486823856e-06, "loss": 0.0551, "step": 2920 }, { "epoch": 2.0235538621406306, "grad_norm": 0.5520080327987671, "learning_rate": 7.981276005547851e-06, "loss": 0.0366, "step": 2921 }, { "epoch": 2.0242466227918254, "grad_norm": 0.648896336555481, "learning_rate": 7.980582524271844e-06, "loss": 0.0456, "step": 2922 }, { "epoch": 2.0249393834430203, "grad_norm": 0.6456888914108276, "learning_rate": 7.97988904299584e-06, "loss": 0.0576, "step": 2923 }, { "epoch": 2.0256321440942155, "grad_norm": 0.6049957871437073, "learning_rate": 7.979195561719834e-06, "loss": 0.0524, "step": 2924 }, { "epoch": 2.0263249047454104, "grad_norm": 0.6657125353813171, "learning_rate": 7.978502080443829e-06, "loss": 0.0623, "step": 2925 }, { "epoch": 2.0270176653966057, "grad_norm": 0.6422529816627502, "learning_rate": 7.977808599167824e-06, "loss": 0.0627, "step": 2926 }, { "epoch": 2.0277104260478005, "grad_norm": 0.5862202048301697, "learning_rate": 7.977115117891817e-06, "loss": 0.0368, "step": 2927 }, { "epoch": 2.0284031866989953, "grad_norm": 0.6602012515068054, "learning_rate": 7.976421636615812e-06, "loss": 0.0663, "step": 2928 }, { "epoch": 2.0290959473501906, "grad_norm": 0.6749635338783264, "learning_rate": 7.975728155339807e-06, "loss": 0.0566, "step": 2929 }, { "epoch": 2.0297887080013854, "grad_norm": 0.6463996767997742, "learning_rate": 7.9750346740638e-06, "loss": 0.0528, "step": 2930 }, { "epoch": 2.0304814686525807, "grad_norm": 0.6901179552078247, "learning_rate": 7.974341192787795e-06, "loss": 0.0525, "step": 2931 }, { "epoch": 2.0311742293037756, "grad_norm": 0.6191695332527161, "learning_rate": 7.97364771151179e-06, "loss": 0.0449, "step": 2932 }, { "epoch": 2.0318669899549704, "grad_norm": 0.6079990267753601, "learning_rate": 7.972954230235785e-06, "loss": 0.049, "step": 2933 }, { "epoch": 2.0325597506061657, "grad_norm": 0.7164596319198608, "learning_rate": 7.97226074895978e-06, "loss": 0.0598, "step": 2934 }, { "epoch": 2.0332525112573605, "grad_norm": 0.6479280591011047, "learning_rate": 7.971567267683773e-06, "loss": 0.0536, "step": 2935 }, { "epoch": 2.033945271908556, "grad_norm": 0.7407905459403992, "learning_rate": 7.970873786407768e-06, "loss": 0.0657, "step": 2936 }, { "epoch": 2.0346380325597506, "grad_norm": 0.6542361378669739, "learning_rate": 7.970180305131761e-06, "loss": 0.0552, "step": 2937 }, { "epoch": 2.0353307932109455, "grad_norm": 0.6269006729125977, "learning_rate": 7.969486823855756e-06, "loss": 0.0668, "step": 2938 }, { "epoch": 2.0360235538621407, "grad_norm": 0.6172609329223633, "learning_rate": 7.968793342579751e-06, "loss": 0.0465, "step": 2939 }, { "epoch": 2.0367163145133356, "grad_norm": 0.7666977643966675, "learning_rate": 7.968099861303745e-06, "loss": 0.0518, "step": 2940 }, { "epoch": 2.037409075164531, "grad_norm": 0.6957048177719116, "learning_rate": 7.96740638002774e-06, "loss": 0.0612, "step": 2941 }, { "epoch": 2.0381018358157257, "grad_norm": 0.5479961633682251, "learning_rate": 7.966712898751734e-06, "loss": 0.039, "step": 2942 }, { "epoch": 2.0387945964669205, "grad_norm": 0.7017296552658081, "learning_rate": 7.96601941747573e-06, "loss": 0.0548, "step": 2943 }, { "epoch": 2.039487357118116, "grad_norm": 0.6820141077041626, "learning_rate": 7.965325936199724e-06, "loss": 0.054, "step": 2944 }, { "epoch": 2.0401801177693106, "grad_norm": 0.7078639268875122, "learning_rate": 7.964632454923718e-06, "loss": 0.064, "step": 2945 }, { "epoch": 2.040872878420506, "grad_norm": 0.6844906210899353, "learning_rate": 7.963938973647712e-06, "loss": 0.0557, "step": 2946 }, { "epoch": 2.0415656390717007, "grad_norm": 0.75017911195755, "learning_rate": 7.963245492371706e-06, "loss": 0.0641, "step": 2947 }, { "epoch": 2.0422583997228956, "grad_norm": 0.6246193647384644, "learning_rate": 7.9625520110957e-06, "loss": 0.0493, "step": 2948 }, { "epoch": 2.042951160374091, "grad_norm": 0.6279093027114868, "learning_rate": 7.961858529819696e-06, "loss": 0.0502, "step": 2949 }, { "epoch": 2.0436439210252857, "grad_norm": 0.6237124800682068, "learning_rate": 7.96116504854369e-06, "loss": 0.0503, "step": 2950 }, { "epoch": 2.044336681676481, "grad_norm": 0.5340720415115356, "learning_rate": 7.960471567267685e-06, "loss": 0.0405, "step": 2951 }, { "epoch": 2.045029442327676, "grad_norm": 0.565596342086792, "learning_rate": 7.959778085991679e-06, "loss": 0.0513, "step": 2952 }, { "epoch": 2.0457222029788706, "grad_norm": 0.6254692077636719, "learning_rate": 7.959084604715674e-06, "loss": 0.0516, "step": 2953 }, { "epoch": 2.046414963630066, "grad_norm": 0.5610790848731995, "learning_rate": 7.958391123439669e-06, "loss": 0.0503, "step": 2954 }, { "epoch": 2.0471077242812608, "grad_norm": 0.7071074843406677, "learning_rate": 7.957697642163662e-06, "loss": 0.0609, "step": 2955 }, { "epoch": 2.047800484932456, "grad_norm": 0.6902168989181519, "learning_rate": 7.957004160887657e-06, "loss": 0.0543, "step": 2956 }, { "epoch": 2.048493245583651, "grad_norm": 0.6811742186546326, "learning_rate": 7.95631067961165e-06, "loss": 0.0508, "step": 2957 }, { "epoch": 2.0491860062348457, "grad_norm": 0.5946432948112488, "learning_rate": 7.955617198335645e-06, "loss": 0.0415, "step": 2958 }, { "epoch": 2.049878766886041, "grad_norm": 0.7139899134635925, "learning_rate": 7.95492371705964e-06, "loss": 0.0523, "step": 2959 }, { "epoch": 2.050571527537236, "grad_norm": 0.5924705266952515, "learning_rate": 7.954230235783635e-06, "loss": 0.054, "step": 2960 }, { "epoch": 2.051264288188431, "grad_norm": 0.6712675094604492, "learning_rate": 7.95353675450763e-06, "loss": 0.0535, "step": 2961 }, { "epoch": 2.051957048839626, "grad_norm": 0.6940580010414124, "learning_rate": 7.952843273231623e-06, "loss": 0.0554, "step": 2962 }, { "epoch": 2.0526498094908208, "grad_norm": 0.6344907879829407, "learning_rate": 7.952149791955618e-06, "loss": 0.043, "step": 2963 }, { "epoch": 2.053342570142016, "grad_norm": 0.6311092376708984, "learning_rate": 7.951456310679613e-06, "loss": 0.0504, "step": 2964 }, { "epoch": 2.054035330793211, "grad_norm": 0.6497060656547546, "learning_rate": 7.950762829403606e-06, "loss": 0.0402, "step": 2965 }, { "epoch": 2.054728091444406, "grad_norm": 0.5674020051956177, "learning_rate": 7.950069348127601e-06, "loss": 0.0505, "step": 2966 }, { "epoch": 2.055420852095601, "grad_norm": 0.5772608518600464, "learning_rate": 7.949375866851596e-06, "loss": 0.0501, "step": 2967 }, { "epoch": 2.056113612746796, "grad_norm": 0.7331957221031189, "learning_rate": 7.94868238557559e-06, "loss": 0.0426, "step": 2968 }, { "epoch": 2.056806373397991, "grad_norm": 0.6528723835945129, "learning_rate": 7.947988904299586e-06, "loss": 0.0516, "step": 2969 }, { "epoch": 2.057499134049186, "grad_norm": 0.5991925597190857, "learning_rate": 7.947295423023579e-06, "loss": 0.0489, "step": 2970 }, { "epoch": 2.058191894700381, "grad_norm": 0.539746105670929, "learning_rate": 7.946601941747574e-06, "loss": 0.0419, "step": 2971 }, { "epoch": 2.058884655351576, "grad_norm": 0.7843754887580872, "learning_rate": 7.945908460471567e-06, "loss": 0.0545, "step": 2972 }, { "epoch": 2.059577416002771, "grad_norm": 0.7631123065948486, "learning_rate": 7.945214979195562e-06, "loss": 0.0374, "step": 2973 }, { "epoch": 2.060270176653966, "grad_norm": 0.7931402325630188, "learning_rate": 7.944521497919557e-06, "loss": 0.0609, "step": 2974 }, { "epoch": 2.060962937305161, "grad_norm": 0.5307839512825012, "learning_rate": 7.94382801664355e-06, "loss": 0.0422, "step": 2975 }, { "epoch": 2.0616556979563563, "grad_norm": 0.6274649500846863, "learning_rate": 7.943134535367545e-06, "loss": 0.05, "step": 2976 }, { "epoch": 2.062348458607551, "grad_norm": 0.6522165536880493, "learning_rate": 7.94244105409154e-06, "loss": 0.0474, "step": 2977 }, { "epoch": 2.063041219258746, "grad_norm": 0.677781343460083, "learning_rate": 7.941747572815535e-06, "loss": 0.0453, "step": 2978 }, { "epoch": 2.0637339799099412, "grad_norm": 0.6549713015556335, "learning_rate": 7.94105409153953e-06, "loss": 0.0605, "step": 2979 }, { "epoch": 2.064426740561136, "grad_norm": 0.6024202108383179, "learning_rate": 7.940360610263523e-06, "loss": 0.0567, "step": 2980 }, { "epoch": 2.0651195012123313, "grad_norm": 0.6366188526153564, "learning_rate": 7.939667128987518e-06, "loss": 0.0496, "step": 2981 }, { "epoch": 2.065812261863526, "grad_norm": 0.7096099853515625, "learning_rate": 7.938973647711511e-06, "loss": 0.062, "step": 2982 }, { "epoch": 2.066505022514721, "grad_norm": 0.7041839957237244, "learning_rate": 7.938280166435506e-06, "loss": 0.0547, "step": 2983 }, { "epoch": 2.0671977831659163, "grad_norm": 0.6971117258071899, "learning_rate": 7.937586685159501e-06, "loss": 0.0442, "step": 2984 }, { "epoch": 2.067890543817111, "grad_norm": 0.6415303349494934, "learning_rate": 7.936893203883496e-06, "loss": 0.0538, "step": 2985 }, { "epoch": 2.0685833044683064, "grad_norm": 0.6057801246643066, "learning_rate": 7.936199722607491e-06, "loss": 0.0425, "step": 2986 }, { "epoch": 2.0692760651195012, "grad_norm": 0.7372746467590332, "learning_rate": 7.935506241331484e-06, "loss": 0.0514, "step": 2987 }, { "epoch": 2.069968825770696, "grad_norm": 0.6791513562202454, "learning_rate": 7.93481276005548e-06, "loss": 0.0531, "step": 2988 }, { "epoch": 2.0706615864218914, "grad_norm": 0.6759649515151978, "learning_rate": 7.934119278779474e-06, "loss": 0.0537, "step": 2989 }, { "epoch": 2.071354347073086, "grad_norm": 0.64107346534729, "learning_rate": 7.933425797503468e-06, "loss": 0.0504, "step": 2990 }, { "epoch": 2.0720471077242815, "grad_norm": 0.6264382600784302, "learning_rate": 7.932732316227462e-06, "loss": 0.0472, "step": 2991 }, { "epoch": 2.0727398683754763, "grad_norm": 0.6337283253669739, "learning_rate": 7.932038834951457e-06, "loss": 0.0542, "step": 2992 }, { "epoch": 2.073432629026671, "grad_norm": 0.7504245638847351, "learning_rate": 7.93134535367545e-06, "loss": 0.0552, "step": 2993 }, { "epoch": 2.0741253896778664, "grad_norm": 0.8058987855911255, "learning_rate": 7.930651872399446e-06, "loss": 0.07, "step": 2994 }, { "epoch": 2.0748181503290613, "grad_norm": 0.6072273254394531, "learning_rate": 7.92995839112344e-06, "loss": 0.0563, "step": 2995 }, { "epoch": 2.0755109109802565, "grad_norm": 0.6442796587944031, "learning_rate": 7.929264909847435e-06, "loss": 0.0563, "step": 2996 }, { "epoch": 2.0762036716314514, "grad_norm": 0.6914668083190918, "learning_rate": 7.928571428571429e-06, "loss": 0.0561, "step": 2997 }, { "epoch": 2.076896432282646, "grad_norm": 0.6183263659477234, "learning_rate": 7.927877947295424e-06, "loss": 0.0436, "step": 2998 }, { "epoch": 2.0775891929338415, "grad_norm": 0.5945209264755249, "learning_rate": 7.927184466019419e-06, "loss": 0.0491, "step": 2999 }, { "epoch": 2.0782819535850363, "grad_norm": 0.7157323956489563, "learning_rate": 7.926490984743412e-06, "loss": 0.0639, "step": 3000 }, { "epoch": 2.0789747142362316, "grad_norm": 0.7220014929771423, "learning_rate": 7.925797503467407e-06, "loss": 0.0724, "step": 3001 }, { "epoch": 2.0796674748874264, "grad_norm": 0.6445550918579102, "learning_rate": 7.925104022191402e-06, "loss": 0.0525, "step": 3002 }, { "epoch": 2.0803602355386213, "grad_norm": 0.6667922139167786, "learning_rate": 7.924410540915397e-06, "loss": 0.0553, "step": 3003 }, { "epoch": 2.0810529961898165, "grad_norm": 0.7367938160896301, "learning_rate": 7.923717059639391e-06, "loss": 0.0459, "step": 3004 }, { "epoch": 2.0817457568410114, "grad_norm": 0.5869571566581726, "learning_rate": 7.923023578363385e-06, "loss": 0.0465, "step": 3005 }, { "epoch": 2.0824385174922067, "grad_norm": 0.6422292590141296, "learning_rate": 7.92233009708738e-06, "loss": 0.0455, "step": 3006 }, { "epoch": 2.0831312781434015, "grad_norm": 0.7092993855476379, "learning_rate": 7.921636615811373e-06, "loss": 0.0544, "step": 3007 }, { "epoch": 2.0838240387945963, "grad_norm": 0.6405553817749023, "learning_rate": 7.920943134535368e-06, "loss": 0.0619, "step": 3008 }, { "epoch": 2.0845167994457916, "grad_norm": 0.7614275217056274, "learning_rate": 7.920249653259363e-06, "loss": 0.0612, "step": 3009 }, { "epoch": 2.0852095600969864, "grad_norm": 0.6826539635658264, "learning_rate": 7.919556171983358e-06, "loss": 0.0641, "step": 3010 }, { "epoch": 2.0859023207481817, "grad_norm": 0.7215129137039185, "learning_rate": 7.918862690707353e-06, "loss": 0.0538, "step": 3011 }, { "epoch": 2.0865950813993766, "grad_norm": 0.5781185030937195, "learning_rate": 7.918169209431346e-06, "loss": 0.0503, "step": 3012 }, { "epoch": 2.0872878420505714, "grad_norm": 0.7786000967025757, "learning_rate": 7.91747572815534e-06, "loss": 0.0675, "step": 3013 }, { "epoch": 2.0879806027017667, "grad_norm": 0.6735415458679199, "learning_rate": 7.916782246879336e-06, "loss": 0.0517, "step": 3014 }, { "epoch": 2.0886733633529615, "grad_norm": 0.6100610494613647, "learning_rate": 7.916088765603329e-06, "loss": 0.055, "step": 3015 }, { "epoch": 2.0893661240041568, "grad_norm": 0.6115071773529053, "learning_rate": 7.915395284327324e-06, "loss": 0.0486, "step": 3016 }, { "epoch": 2.0900588846553516, "grad_norm": 0.6331568360328674, "learning_rate": 7.914701803051317e-06, "loss": 0.0397, "step": 3017 }, { "epoch": 2.0907516453065464, "grad_norm": 0.6336967945098877, "learning_rate": 7.914008321775312e-06, "loss": 0.0497, "step": 3018 }, { "epoch": 2.0914444059577417, "grad_norm": 0.7088524699211121, "learning_rate": 7.913314840499307e-06, "loss": 0.0571, "step": 3019 }, { "epoch": 2.0921371666089366, "grad_norm": 0.8578203320503235, "learning_rate": 7.912621359223302e-06, "loss": 0.0577, "step": 3020 }, { "epoch": 2.092829927260132, "grad_norm": 0.665698230266571, "learning_rate": 7.911927877947297e-06, "loss": 0.0519, "step": 3021 }, { "epoch": 2.0935226879113267, "grad_norm": 0.625718355178833, "learning_rate": 7.91123439667129e-06, "loss": 0.0571, "step": 3022 }, { "epoch": 2.0942154485625215, "grad_norm": 0.6121425032615662, "learning_rate": 7.910540915395285e-06, "loss": 0.0483, "step": 3023 }, { "epoch": 2.094908209213717, "grad_norm": 0.6906800866127014, "learning_rate": 7.90984743411928e-06, "loss": 0.0535, "step": 3024 }, { "epoch": 2.0956009698649116, "grad_norm": 0.688827395439148, "learning_rate": 7.909153952843273e-06, "loss": 0.0678, "step": 3025 }, { "epoch": 2.0962937305161065, "grad_norm": 0.6596013903617859, "learning_rate": 7.908460471567268e-06, "loss": 0.0537, "step": 3026 }, { "epoch": 2.0969864911673017, "grad_norm": 0.6849279999732971, "learning_rate": 7.907766990291263e-06, "loss": 0.0545, "step": 3027 }, { "epoch": 2.0976792518184966, "grad_norm": 0.5821280479431152, "learning_rate": 7.907073509015258e-06, "loss": 0.0462, "step": 3028 }, { "epoch": 2.098372012469692, "grad_norm": 0.6135004162788391, "learning_rate": 7.906380027739253e-06, "loss": 0.0525, "step": 3029 }, { "epoch": 2.0990647731208867, "grad_norm": 0.5608628988265991, "learning_rate": 7.905686546463246e-06, "loss": 0.0425, "step": 3030 }, { "epoch": 2.099757533772082, "grad_norm": 0.9066096544265747, "learning_rate": 7.904993065187241e-06, "loss": 0.0461, "step": 3031 }, { "epoch": 2.100450294423277, "grad_norm": 0.6573208570480347, "learning_rate": 7.904299583911234e-06, "loss": 0.0557, "step": 3032 }, { "epoch": 2.1011430550744716, "grad_norm": 0.5651605725288391, "learning_rate": 7.90360610263523e-06, "loss": 0.0431, "step": 3033 }, { "epoch": 2.101835815725667, "grad_norm": 0.547998309135437, "learning_rate": 7.902912621359224e-06, "loss": 0.0417, "step": 3034 }, { "epoch": 2.1025285763768617, "grad_norm": 0.5699440240859985, "learning_rate": 7.902219140083217e-06, "loss": 0.0514, "step": 3035 }, { "epoch": 2.1032213370280566, "grad_norm": 0.7615641951560974, "learning_rate": 7.901525658807212e-06, "loss": 0.0602, "step": 3036 }, { "epoch": 2.103914097679252, "grad_norm": 0.638684868812561, "learning_rate": 7.900832177531207e-06, "loss": 0.0525, "step": 3037 }, { "epoch": 2.1046068583304467, "grad_norm": 0.663013756275177, "learning_rate": 7.900138696255202e-06, "loss": 0.0334, "step": 3038 }, { "epoch": 2.105299618981642, "grad_norm": 0.7992621660232544, "learning_rate": 7.899445214979197e-06, "loss": 0.054, "step": 3039 }, { "epoch": 2.105992379632837, "grad_norm": 0.6039307713508606, "learning_rate": 7.89875173370319e-06, "loss": 0.0525, "step": 3040 }, { "epoch": 2.106685140284032, "grad_norm": 0.607890248298645, "learning_rate": 7.898058252427185e-06, "loss": 0.047, "step": 3041 }, { "epoch": 2.107377900935227, "grad_norm": 0.7209950089454651, "learning_rate": 7.897364771151179e-06, "loss": 0.0704, "step": 3042 }, { "epoch": 2.1080706615864218, "grad_norm": 0.6088247299194336, "learning_rate": 7.896671289875174e-06, "loss": 0.0398, "step": 3043 }, { "epoch": 2.108763422237617, "grad_norm": 0.6290914416313171, "learning_rate": 7.895977808599168e-06, "loss": 0.0493, "step": 3044 }, { "epoch": 2.109456182888812, "grad_norm": 0.61188143491745, "learning_rate": 7.895284327323163e-06, "loss": 0.0491, "step": 3045 }, { "epoch": 2.1101489435400067, "grad_norm": 0.6168179512023926, "learning_rate": 7.894590846047158e-06, "loss": 0.0423, "step": 3046 }, { "epoch": 2.110841704191202, "grad_norm": 0.7018704414367676, "learning_rate": 7.893897364771152e-06, "loss": 0.0543, "step": 3047 }, { "epoch": 2.111534464842397, "grad_norm": 0.5940234065055847, "learning_rate": 7.893203883495147e-06, "loss": 0.0445, "step": 3048 }, { "epoch": 2.112227225493592, "grad_norm": 0.6289170384407043, "learning_rate": 7.892510402219141e-06, "loss": 0.0479, "step": 3049 }, { "epoch": 2.112919986144787, "grad_norm": 0.7200828194618225, "learning_rate": 7.891816920943135e-06, "loss": 0.057, "step": 3050 }, { "epoch": 2.113612746795982, "grad_norm": 0.6054809093475342, "learning_rate": 7.89112343966713e-06, "loss": 0.0474, "step": 3051 }, { "epoch": 2.114305507447177, "grad_norm": 0.7207465171813965, "learning_rate": 7.890429958391123e-06, "loss": 0.0751, "step": 3052 }, { "epoch": 2.114998268098372, "grad_norm": 0.6288327574729919, "learning_rate": 7.889736477115118e-06, "loss": 0.0515, "step": 3053 }, { "epoch": 2.115691028749567, "grad_norm": 0.6983470916748047, "learning_rate": 7.889042995839113e-06, "loss": 0.0491, "step": 3054 }, { "epoch": 2.116383789400762, "grad_norm": 0.6435803771018982, "learning_rate": 7.888349514563108e-06, "loss": 0.0434, "step": 3055 }, { "epoch": 2.117076550051957, "grad_norm": 0.6716493368148804, "learning_rate": 7.887656033287103e-06, "loss": 0.0558, "step": 3056 }, { "epoch": 2.117769310703152, "grad_norm": 0.6133092641830444, "learning_rate": 7.886962552011096e-06, "loss": 0.0534, "step": 3057 }, { "epoch": 2.118462071354347, "grad_norm": 0.5793967247009277, "learning_rate": 7.88626907073509e-06, "loss": 0.0401, "step": 3058 }, { "epoch": 2.119154832005542, "grad_norm": 0.6967165470123291, "learning_rate": 7.885575589459086e-06, "loss": 0.0532, "step": 3059 }, { "epoch": 2.119847592656737, "grad_norm": 0.5157840251922607, "learning_rate": 7.884882108183079e-06, "loss": 0.0377, "step": 3060 }, { "epoch": 2.1205403533079323, "grad_norm": 0.6262407302856445, "learning_rate": 7.884188626907074e-06, "loss": 0.0458, "step": 3061 }, { "epoch": 2.121233113959127, "grad_norm": 0.6209738254547119, "learning_rate": 7.883495145631069e-06, "loss": 0.0511, "step": 3062 }, { "epoch": 2.121925874610322, "grad_norm": 0.6587721705436707, "learning_rate": 7.882801664355064e-06, "loss": 0.0579, "step": 3063 }, { "epoch": 2.1226186352615173, "grad_norm": 0.6426872611045837, "learning_rate": 7.882108183079059e-06, "loss": 0.0541, "step": 3064 }, { "epoch": 2.123311395912712, "grad_norm": 0.714944064617157, "learning_rate": 7.881414701803052e-06, "loss": 0.0582, "step": 3065 }, { "epoch": 2.124004156563907, "grad_norm": 0.6246589422225952, "learning_rate": 7.880721220527047e-06, "loss": 0.0563, "step": 3066 }, { "epoch": 2.1246969172151022, "grad_norm": 0.8801479339599609, "learning_rate": 7.88002773925104e-06, "loss": 0.0638, "step": 3067 }, { "epoch": 2.125389677866297, "grad_norm": 0.6280035376548767, "learning_rate": 7.879334257975035e-06, "loss": 0.0483, "step": 3068 }, { "epoch": 2.1260824385174923, "grad_norm": 0.670604407787323, "learning_rate": 7.87864077669903e-06, "loss": 0.0577, "step": 3069 }, { "epoch": 2.126775199168687, "grad_norm": 0.6534485220909119, "learning_rate": 7.877947295423023e-06, "loss": 0.0465, "step": 3070 }, { "epoch": 2.1274679598198825, "grad_norm": 0.7170853614807129, "learning_rate": 7.877253814147018e-06, "loss": 0.0411, "step": 3071 }, { "epoch": 2.1281607204710773, "grad_norm": 0.6874311566352844, "learning_rate": 7.876560332871013e-06, "loss": 0.0568, "step": 3072 }, { "epoch": 2.128853481122272, "grad_norm": 0.5714280605316162, "learning_rate": 7.875866851595008e-06, "loss": 0.0437, "step": 3073 }, { "epoch": 2.1295462417734674, "grad_norm": 0.5944762229919434, "learning_rate": 7.875173370319003e-06, "loss": 0.0467, "step": 3074 }, { "epoch": 2.1302390024246622, "grad_norm": 0.696190357208252, "learning_rate": 7.874479889042996e-06, "loss": 0.056, "step": 3075 }, { "epoch": 2.130931763075857, "grad_norm": 0.576362669467926, "learning_rate": 7.873786407766991e-06, "loss": 0.0495, "step": 3076 }, { "epoch": 2.1316245237270524, "grad_norm": 0.7446839213371277, "learning_rate": 7.873092926490984e-06, "loss": 0.0591, "step": 3077 }, { "epoch": 2.132317284378247, "grad_norm": 0.5943527221679688, "learning_rate": 7.87239944521498e-06, "loss": 0.0486, "step": 3078 }, { "epoch": 2.1330100450294425, "grad_norm": 0.5902846455574036, "learning_rate": 7.871705963938974e-06, "loss": 0.0446, "step": 3079 }, { "epoch": 2.1337028056806373, "grad_norm": 0.6252435445785522, "learning_rate": 7.87101248266297e-06, "loss": 0.0438, "step": 3080 }, { "epoch": 2.1343955663318326, "grad_norm": 0.6630060076713562, "learning_rate": 7.870319001386964e-06, "loss": 0.0614, "step": 3081 }, { "epoch": 2.1350883269830274, "grad_norm": 0.5988588929176331, "learning_rate": 7.869625520110957e-06, "loss": 0.0431, "step": 3082 }, { "epoch": 2.1357810876342223, "grad_norm": 0.8282397985458374, "learning_rate": 7.868932038834952e-06, "loss": 0.0673, "step": 3083 }, { "epoch": 2.1364738482854175, "grad_norm": 0.8640880584716797, "learning_rate": 7.868238557558947e-06, "loss": 0.0504, "step": 3084 }, { "epoch": 2.1371666089366124, "grad_norm": 0.6471401453018188, "learning_rate": 7.86754507628294e-06, "loss": 0.0553, "step": 3085 }, { "epoch": 2.137859369587807, "grad_norm": 0.6992062330245972, "learning_rate": 7.866851595006935e-06, "loss": 0.0531, "step": 3086 }, { "epoch": 2.1385521302390025, "grad_norm": 0.6501442790031433, "learning_rate": 7.86615811373093e-06, "loss": 0.0482, "step": 3087 }, { "epoch": 2.1392448908901973, "grad_norm": 0.6028953790664673, "learning_rate": 7.865464632454925e-06, "loss": 0.0434, "step": 3088 }, { "epoch": 2.1399376515413926, "grad_norm": 0.7869958281517029, "learning_rate": 7.864771151178918e-06, "loss": 0.0579, "step": 3089 }, { "epoch": 2.1406304121925874, "grad_norm": 0.8090862035751343, "learning_rate": 7.864077669902913e-06, "loss": 0.0658, "step": 3090 }, { "epoch": 2.1413231728437827, "grad_norm": 0.6419026851654053, "learning_rate": 7.863384188626908e-06, "loss": 0.0521, "step": 3091 }, { "epoch": 2.1420159334949775, "grad_norm": 0.6343168616294861, "learning_rate": 7.862690707350902e-06, "loss": 0.0484, "step": 3092 }, { "epoch": 2.1427086941461724, "grad_norm": 0.6734219789505005, "learning_rate": 7.861997226074897e-06, "loss": 0.0437, "step": 3093 }, { "epoch": 2.1434014547973677, "grad_norm": 0.7136476635932922, "learning_rate": 7.861303744798891e-06, "loss": 0.057, "step": 3094 }, { "epoch": 2.1440942154485625, "grad_norm": 0.8247693777084351, "learning_rate": 7.860610263522885e-06, "loss": 0.071, "step": 3095 }, { "epoch": 2.1447869760997573, "grad_norm": 0.6431103348731995, "learning_rate": 7.85991678224688e-06, "loss": 0.0611, "step": 3096 }, { "epoch": 2.1454797367509526, "grad_norm": 0.6054585576057434, "learning_rate": 7.859223300970875e-06, "loss": 0.047, "step": 3097 }, { "epoch": 2.1461724974021474, "grad_norm": 0.7017614245414734, "learning_rate": 7.85852981969487e-06, "loss": 0.059, "step": 3098 }, { "epoch": 2.1468652580533427, "grad_norm": 0.5154880881309509, "learning_rate": 7.857836338418864e-06, "loss": 0.0323, "step": 3099 }, { "epoch": 2.1475580187045376, "grad_norm": 0.6577040553092957, "learning_rate": 7.857142857142858e-06, "loss": 0.0544, "step": 3100 }, { "epoch": 2.1482507793557324, "grad_norm": 0.5519864559173584, "learning_rate": 7.856449375866853e-06, "loss": 0.0403, "step": 3101 }, { "epoch": 2.1489435400069277, "grad_norm": 0.6759754419326782, "learning_rate": 7.855755894590846e-06, "loss": 0.0597, "step": 3102 }, { "epoch": 2.1496363006581225, "grad_norm": 0.6048867106437683, "learning_rate": 7.85506241331484e-06, "loss": 0.0487, "step": 3103 }, { "epoch": 2.150329061309318, "grad_norm": 0.6813057065010071, "learning_rate": 7.854368932038836e-06, "loss": 0.0518, "step": 3104 }, { "epoch": 2.1510218219605126, "grad_norm": 0.7083479166030884, "learning_rate": 7.85367545076283e-06, "loss": 0.0405, "step": 3105 }, { "epoch": 2.1517145826117074, "grad_norm": 0.5903550982475281, "learning_rate": 7.852981969486826e-06, "loss": 0.0595, "step": 3106 }, { "epoch": 2.1524073432629027, "grad_norm": 0.700406551361084, "learning_rate": 7.852288488210819e-06, "loss": 0.056, "step": 3107 }, { "epoch": 2.1531001039140976, "grad_norm": 0.7421799302101135, "learning_rate": 7.851595006934814e-06, "loss": 0.0625, "step": 3108 }, { "epoch": 2.153792864565293, "grad_norm": 0.6779900193214417, "learning_rate": 7.850901525658809e-06, "loss": 0.0537, "step": 3109 }, { "epoch": 2.1544856252164877, "grad_norm": 0.7126687169075012, "learning_rate": 7.850208044382802e-06, "loss": 0.0538, "step": 3110 }, { "epoch": 2.1551783858676825, "grad_norm": 0.6870993971824646, "learning_rate": 7.849514563106797e-06, "loss": 0.0593, "step": 3111 }, { "epoch": 2.155871146518878, "grad_norm": 0.9907678365707397, "learning_rate": 7.84882108183079e-06, "loss": 0.0746, "step": 3112 }, { "epoch": 2.1565639071700726, "grad_norm": 0.5799802541732788, "learning_rate": 7.848127600554785e-06, "loss": 0.0422, "step": 3113 }, { "epoch": 2.157256667821268, "grad_norm": 0.7612532377243042, "learning_rate": 7.84743411927878e-06, "loss": 0.0542, "step": 3114 }, { "epoch": 2.1579494284724627, "grad_norm": 0.6533823013305664, "learning_rate": 7.846740638002775e-06, "loss": 0.0518, "step": 3115 }, { "epoch": 2.1586421891236576, "grad_norm": 0.7348969578742981, "learning_rate": 7.84604715672677e-06, "loss": 0.0606, "step": 3116 }, { "epoch": 2.159334949774853, "grad_norm": 0.6741594076156616, "learning_rate": 7.845353675450763e-06, "loss": 0.0544, "step": 3117 }, { "epoch": 2.1600277104260477, "grad_norm": 0.6544450521469116, "learning_rate": 7.844660194174758e-06, "loss": 0.0548, "step": 3118 }, { "epoch": 2.160720471077243, "grad_norm": 0.7590257525444031, "learning_rate": 7.843966712898753e-06, "loss": 0.051, "step": 3119 }, { "epoch": 2.161413231728438, "grad_norm": 0.7456057071685791, "learning_rate": 7.843273231622746e-06, "loss": 0.0483, "step": 3120 }, { "epoch": 2.1621059923796326, "grad_norm": 0.6765349507331848, "learning_rate": 7.842579750346741e-06, "loss": 0.0671, "step": 3121 }, { "epoch": 2.162798753030828, "grad_norm": 0.6293055415153503, "learning_rate": 7.841886269070736e-06, "loss": 0.0461, "step": 3122 }, { "epoch": 2.1634915136820227, "grad_norm": 0.627202570438385, "learning_rate": 7.841192787794731e-06, "loss": 0.0599, "step": 3123 }, { "epoch": 2.164184274333218, "grad_norm": 0.5979534387588501, "learning_rate": 7.840499306518726e-06, "loss": 0.047, "step": 3124 }, { "epoch": 2.164877034984413, "grad_norm": 0.6458120346069336, "learning_rate": 7.839805825242719e-06, "loss": 0.0471, "step": 3125 }, { "epoch": 2.1655697956356077, "grad_norm": 0.6617605090141296, "learning_rate": 7.839112343966714e-06, "loss": 0.0631, "step": 3126 }, { "epoch": 2.166262556286803, "grad_norm": 0.7290152907371521, "learning_rate": 7.838418862690707e-06, "loss": 0.0715, "step": 3127 }, { "epoch": 2.166955316937998, "grad_norm": 0.6518305540084839, "learning_rate": 7.837725381414702e-06, "loss": 0.056, "step": 3128 }, { "epoch": 2.167648077589193, "grad_norm": 0.6306636333465576, "learning_rate": 7.837031900138697e-06, "loss": 0.0495, "step": 3129 }, { "epoch": 2.168340838240388, "grad_norm": 0.6336146593093872, "learning_rate": 7.83633841886269e-06, "loss": 0.0545, "step": 3130 }, { "epoch": 2.1690335988915828, "grad_norm": 0.6405359506607056, "learning_rate": 7.835644937586685e-06, "loss": 0.0542, "step": 3131 }, { "epoch": 2.169726359542778, "grad_norm": 0.5968299508094788, "learning_rate": 7.83495145631068e-06, "loss": 0.0408, "step": 3132 }, { "epoch": 2.170419120193973, "grad_norm": 0.6285966634750366, "learning_rate": 7.834257975034675e-06, "loss": 0.0465, "step": 3133 }, { "epoch": 2.171111880845168, "grad_norm": 0.9761276841163635, "learning_rate": 7.83356449375867e-06, "loss": 0.0615, "step": 3134 }, { "epoch": 2.171804641496363, "grad_norm": 0.686771035194397, "learning_rate": 7.832871012482663e-06, "loss": 0.0439, "step": 3135 }, { "epoch": 2.172497402147558, "grad_norm": 0.6235953569412231, "learning_rate": 7.832177531206658e-06, "loss": 0.0471, "step": 3136 }, { "epoch": 2.173190162798753, "grad_norm": 0.695323646068573, "learning_rate": 7.831484049930652e-06, "loss": 0.0584, "step": 3137 }, { "epoch": 2.173882923449948, "grad_norm": 0.6751148104667664, "learning_rate": 7.830790568654646e-06, "loss": 0.0522, "step": 3138 }, { "epoch": 2.174575684101143, "grad_norm": 0.6266228556632996, "learning_rate": 7.830097087378641e-06, "loss": 0.0501, "step": 3139 }, { "epoch": 2.175268444752338, "grad_norm": 0.6762698888778687, "learning_rate": 7.829403606102636e-06, "loss": 0.0557, "step": 3140 }, { "epoch": 2.175961205403533, "grad_norm": 0.6563423275947571, "learning_rate": 7.828710124826631e-06, "loss": 0.0479, "step": 3141 }, { "epoch": 2.176653966054728, "grad_norm": 0.6651897430419922, "learning_rate": 7.828016643550625e-06, "loss": 0.055, "step": 3142 }, { "epoch": 2.177346726705923, "grad_norm": 0.6874797344207764, "learning_rate": 7.82732316227462e-06, "loss": 0.0541, "step": 3143 }, { "epoch": 2.1780394873571183, "grad_norm": 0.7515442371368408, "learning_rate": 7.826629680998614e-06, "loss": 0.0678, "step": 3144 }, { "epoch": 2.178732248008313, "grad_norm": 0.7127979397773743, "learning_rate": 7.825936199722608e-06, "loss": 0.0633, "step": 3145 }, { "epoch": 2.179425008659508, "grad_norm": 0.6155861020088196, "learning_rate": 7.825242718446603e-06, "loss": 0.0355, "step": 3146 }, { "epoch": 2.1801177693107032, "grad_norm": 0.6179302930831909, "learning_rate": 7.824549237170598e-06, "loss": 0.0474, "step": 3147 }, { "epoch": 2.180810529961898, "grad_norm": 0.6917999386787415, "learning_rate": 7.82385575589459e-06, "loss": 0.0581, "step": 3148 }, { "epoch": 2.1815032906130933, "grad_norm": 0.7743158340454102, "learning_rate": 7.823162274618586e-06, "loss": 0.0614, "step": 3149 }, { "epoch": 2.182196051264288, "grad_norm": 0.6892214417457581, "learning_rate": 7.82246879334258e-06, "loss": 0.0494, "step": 3150 }, { "epoch": 2.182888811915483, "grad_norm": 0.6922516226768494, "learning_rate": 7.821775312066576e-06, "loss": 0.063, "step": 3151 }, { "epoch": 2.1835815725666783, "grad_norm": 0.5864610075950623, "learning_rate": 7.821081830790569e-06, "loss": 0.0435, "step": 3152 }, { "epoch": 2.184274333217873, "grad_norm": 0.6471571922302246, "learning_rate": 7.820388349514564e-06, "loss": 0.0538, "step": 3153 }, { "epoch": 2.1849670938690684, "grad_norm": 0.707469642162323, "learning_rate": 7.819694868238559e-06, "loss": 0.0814, "step": 3154 }, { "epoch": 2.1856598545202632, "grad_norm": 0.656551718711853, "learning_rate": 7.819001386962552e-06, "loss": 0.0591, "step": 3155 }, { "epoch": 2.186352615171458, "grad_norm": 0.6638685464859009, "learning_rate": 7.818307905686547e-06, "loss": 0.0529, "step": 3156 }, { "epoch": 2.1870453758226533, "grad_norm": 0.682731568813324, "learning_rate": 7.817614424410542e-06, "loss": 0.0583, "step": 3157 }, { "epoch": 2.187738136473848, "grad_norm": 0.6498706340789795, "learning_rate": 7.816920943134537e-06, "loss": 0.0528, "step": 3158 }, { "epoch": 2.1884308971250435, "grad_norm": 0.6912710666656494, "learning_rate": 7.816227461858532e-06, "loss": 0.0558, "step": 3159 }, { "epoch": 2.1891236577762383, "grad_norm": 0.7095940709114075, "learning_rate": 7.815533980582525e-06, "loss": 0.0612, "step": 3160 }, { "epoch": 2.189816418427433, "grad_norm": 0.7736082673072815, "learning_rate": 7.81484049930652e-06, "loss": 0.0512, "step": 3161 }, { "epoch": 2.1905091790786284, "grad_norm": 0.6644687056541443, "learning_rate": 7.814147018030513e-06, "loss": 0.0533, "step": 3162 }, { "epoch": 2.1912019397298232, "grad_norm": 0.7022423148155212, "learning_rate": 7.813453536754508e-06, "loss": 0.0583, "step": 3163 }, { "epoch": 2.1918947003810185, "grad_norm": 0.7000126838684082, "learning_rate": 7.812760055478503e-06, "loss": 0.0467, "step": 3164 }, { "epoch": 2.1925874610322134, "grad_norm": 0.6936020851135254, "learning_rate": 7.812066574202498e-06, "loss": 0.0631, "step": 3165 }, { "epoch": 2.193280221683408, "grad_norm": 0.64175945520401, "learning_rate": 7.811373092926493e-06, "loss": 0.0412, "step": 3166 }, { "epoch": 2.1939729823346035, "grad_norm": 0.7573792338371277, "learning_rate": 7.810679611650486e-06, "loss": 0.0539, "step": 3167 }, { "epoch": 2.1946657429857983, "grad_norm": 0.6865546703338623, "learning_rate": 7.809986130374481e-06, "loss": 0.0783, "step": 3168 }, { "epoch": 2.1953585036369936, "grad_norm": 0.6688504219055176, "learning_rate": 7.809292649098476e-06, "loss": 0.0454, "step": 3169 }, { "epoch": 2.1960512642881884, "grad_norm": 0.5821261405944824, "learning_rate": 7.808599167822469e-06, "loss": 0.0464, "step": 3170 }, { "epoch": 2.1967440249393833, "grad_norm": 0.6367770433425903, "learning_rate": 7.807905686546464e-06, "loss": 0.0474, "step": 3171 }, { "epoch": 2.1974367855905785, "grad_norm": 0.6454951763153076, "learning_rate": 7.807212205270457e-06, "loss": 0.0518, "step": 3172 }, { "epoch": 2.1981295462417734, "grad_norm": 0.6782212257385254, "learning_rate": 7.806518723994452e-06, "loss": 0.0538, "step": 3173 }, { "epoch": 2.1988223068929686, "grad_norm": 0.7437390685081482, "learning_rate": 7.805825242718447e-06, "loss": 0.0503, "step": 3174 }, { "epoch": 2.1995150675441635, "grad_norm": 0.6945703029632568, "learning_rate": 7.805131761442442e-06, "loss": 0.0631, "step": 3175 }, { "epoch": 2.2002078281953583, "grad_norm": 0.654305636882782, "learning_rate": 7.804438280166437e-06, "loss": 0.0502, "step": 3176 }, { "epoch": 2.2009005888465536, "grad_norm": 0.6783000230789185, "learning_rate": 7.80374479889043e-06, "loss": 0.0527, "step": 3177 }, { "epoch": 2.2015933494977484, "grad_norm": 0.6859548687934875, "learning_rate": 7.803051317614425e-06, "loss": 0.0615, "step": 3178 }, { "epoch": 2.2022861101489437, "grad_norm": 0.6832144260406494, "learning_rate": 7.80235783633842e-06, "loss": 0.0493, "step": 3179 }, { "epoch": 2.2029788708001385, "grad_norm": 0.6961016654968262, "learning_rate": 7.801664355062413e-06, "loss": 0.0651, "step": 3180 }, { "epoch": 2.2036716314513334, "grad_norm": 0.5712021589279175, "learning_rate": 7.800970873786408e-06, "loss": 0.0347, "step": 3181 }, { "epoch": 2.2043643921025287, "grad_norm": 0.6744644045829773, "learning_rate": 7.800277392510403e-06, "loss": 0.0432, "step": 3182 }, { "epoch": 2.2050571527537235, "grad_norm": 0.6067906618118286, "learning_rate": 7.799583911234398e-06, "loss": 0.0512, "step": 3183 }, { "epoch": 2.2057499134049188, "grad_norm": 0.6815451383590698, "learning_rate": 7.798890429958393e-06, "loss": 0.0593, "step": 3184 }, { "epoch": 2.2064426740561136, "grad_norm": 0.7582570910453796, "learning_rate": 7.798196948682386e-06, "loss": 0.0436, "step": 3185 }, { "epoch": 2.2071354347073084, "grad_norm": 0.7396038770675659, "learning_rate": 7.797503467406381e-06, "loss": 0.0617, "step": 3186 }, { "epoch": 2.2078281953585037, "grad_norm": 0.700670599937439, "learning_rate": 7.796809986130375e-06, "loss": 0.0437, "step": 3187 }, { "epoch": 2.2085209560096986, "grad_norm": 0.7770943641662598, "learning_rate": 7.79611650485437e-06, "loss": 0.0491, "step": 3188 }, { "epoch": 2.209213716660894, "grad_norm": 0.6324613094329834, "learning_rate": 7.795423023578364e-06, "loss": 0.0495, "step": 3189 }, { "epoch": 2.2099064773120887, "grad_norm": 0.6112574934959412, "learning_rate": 7.794729542302358e-06, "loss": 0.0442, "step": 3190 }, { "epoch": 2.2105992379632835, "grad_norm": 0.7486513257026672, "learning_rate": 7.794036061026353e-06, "loss": 0.0557, "step": 3191 }, { "epoch": 2.211291998614479, "grad_norm": 0.7533448934555054, "learning_rate": 7.793342579750347e-06, "loss": 0.0555, "step": 3192 }, { "epoch": 2.2119847592656736, "grad_norm": 0.7420341968536377, "learning_rate": 7.792649098474342e-06, "loss": 0.0565, "step": 3193 }, { "epoch": 2.212677519916869, "grad_norm": 0.645088255405426, "learning_rate": 7.791955617198337e-06, "loss": 0.0545, "step": 3194 }, { "epoch": 2.2133702805680637, "grad_norm": 0.6115416884422302, "learning_rate": 7.79126213592233e-06, "loss": 0.0562, "step": 3195 }, { "epoch": 2.2140630412192586, "grad_norm": 0.6269606351852417, "learning_rate": 7.790568654646326e-06, "loss": 0.0527, "step": 3196 }, { "epoch": 2.214755801870454, "grad_norm": 0.6584285497665405, "learning_rate": 7.789875173370319e-06, "loss": 0.0485, "step": 3197 }, { "epoch": 2.2154485625216487, "grad_norm": 0.6327019333839417, "learning_rate": 7.789181692094314e-06, "loss": 0.0445, "step": 3198 }, { "epoch": 2.216141323172844, "grad_norm": 0.6777721643447876, "learning_rate": 7.788488210818309e-06, "loss": 0.0549, "step": 3199 }, { "epoch": 2.216834083824039, "grad_norm": 0.6763327121734619, "learning_rate": 7.787794729542304e-06, "loss": 0.059, "step": 3200 }, { "epoch": 2.2175268444752336, "grad_norm": 0.6816011071205139, "learning_rate": 7.787101248266299e-06, "loss": 0.0655, "step": 3201 }, { "epoch": 2.218219605126429, "grad_norm": 0.611720621585846, "learning_rate": 7.786407766990292e-06, "loss": 0.0466, "step": 3202 }, { "epoch": 2.2189123657776237, "grad_norm": 0.630315899848938, "learning_rate": 7.785714285714287e-06, "loss": 0.0566, "step": 3203 }, { "epoch": 2.219605126428819, "grad_norm": 0.7050280570983887, "learning_rate": 7.785020804438282e-06, "loss": 0.0589, "step": 3204 }, { "epoch": 2.220297887080014, "grad_norm": 0.7381419539451599, "learning_rate": 7.784327323162275e-06, "loss": 0.0646, "step": 3205 }, { "epoch": 2.2209906477312087, "grad_norm": 0.6707060933113098, "learning_rate": 7.78363384188627e-06, "loss": 0.056, "step": 3206 }, { "epoch": 2.221683408382404, "grad_norm": 0.5947423577308655, "learning_rate": 7.782940360610263e-06, "loss": 0.0509, "step": 3207 }, { "epoch": 2.222376169033599, "grad_norm": 0.6475197672843933, "learning_rate": 7.782246879334258e-06, "loss": 0.0503, "step": 3208 }, { "epoch": 2.223068929684794, "grad_norm": 0.6982625722885132, "learning_rate": 7.781553398058253e-06, "loss": 0.0631, "step": 3209 }, { "epoch": 2.223761690335989, "grad_norm": 0.7394846677780151, "learning_rate": 7.780859916782248e-06, "loss": 0.0618, "step": 3210 }, { "epoch": 2.2244544509871838, "grad_norm": 0.7193068265914917, "learning_rate": 7.780166435506243e-06, "loss": 0.0491, "step": 3211 }, { "epoch": 2.225147211638379, "grad_norm": 0.5738288760185242, "learning_rate": 7.779472954230236e-06, "loss": 0.0443, "step": 3212 }, { "epoch": 2.225839972289574, "grad_norm": 0.7070469856262207, "learning_rate": 7.778779472954231e-06, "loss": 0.0439, "step": 3213 }, { "epoch": 2.226532732940769, "grad_norm": 0.6029460430145264, "learning_rate": 7.778085991678226e-06, "loss": 0.0482, "step": 3214 }, { "epoch": 2.227225493591964, "grad_norm": 0.6252948045730591, "learning_rate": 7.777392510402219e-06, "loss": 0.0447, "step": 3215 }, { "epoch": 2.227918254243159, "grad_norm": 0.6637809872627258, "learning_rate": 7.776699029126214e-06, "loss": 0.0524, "step": 3216 }, { "epoch": 2.228611014894354, "grad_norm": 0.7449789643287659, "learning_rate": 7.776005547850209e-06, "loss": 0.0714, "step": 3217 }, { "epoch": 2.229303775545549, "grad_norm": 0.6193034648895264, "learning_rate": 7.775312066574204e-06, "loss": 0.0385, "step": 3218 }, { "epoch": 2.229996536196744, "grad_norm": 0.702811598777771, "learning_rate": 7.774618585298199e-06, "loss": 0.0538, "step": 3219 }, { "epoch": 2.230689296847939, "grad_norm": 0.746298611164093, "learning_rate": 7.773925104022192e-06, "loss": 0.0549, "step": 3220 }, { "epoch": 2.231382057499134, "grad_norm": 0.6173125505447388, "learning_rate": 7.773231622746187e-06, "loss": 0.0422, "step": 3221 }, { "epoch": 2.232074818150329, "grad_norm": 0.7027741074562073, "learning_rate": 7.77253814147018e-06, "loss": 0.0492, "step": 3222 }, { "epoch": 2.232767578801524, "grad_norm": 0.7909572720527649, "learning_rate": 7.771844660194175e-06, "loss": 0.0701, "step": 3223 }, { "epoch": 2.2334603394527193, "grad_norm": 0.6326325535774231, "learning_rate": 7.77115117891817e-06, "loss": 0.0649, "step": 3224 }, { "epoch": 2.234153100103914, "grad_norm": 0.6022499203681946, "learning_rate": 7.770457697642163e-06, "loss": 0.0497, "step": 3225 }, { "epoch": 2.234845860755109, "grad_norm": 0.7148244976997375, "learning_rate": 7.769764216366158e-06, "loss": 0.0548, "step": 3226 }, { "epoch": 2.235538621406304, "grad_norm": 0.7357353568077087, "learning_rate": 7.769070735090153e-06, "loss": 0.0527, "step": 3227 }, { "epoch": 2.236231382057499, "grad_norm": 0.6361616253852844, "learning_rate": 7.768377253814148e-06, "loss": 0.0506, "step": 3228 }, { "epoch": 2.2369241427086943, "grad_norm": 0.6656021475791931, "learning_rate": 7.767683772538143e-06, "loss": 0.0611, "step": 3229 }, { "epoch": 2.237616903359889, "grad_norm": 0.7853916883468628, "learning_rate": 7.766990291262136e-06, "loss": 0.05, "step": 3230 }, { "epoch": 2.238309664011084, "grad_norm": 0.7821096181869507, "learning_rate": 7.766296809986131e-06, "loss": 0.0473, "step": 3231 }, { "epoch": 2.2390024246622793, "grad_norm": 0.7403864860534668, "learning_rate": 7.765603328710125e-06, "loss": 0.0601, "step": 3232 }, { "epoch": 2.239695185313474, "grad_norm": 0.8113395571708679, "learning_rate": 7.76490984743412e-06, "loss": 0.0598, "step": 3233 }, { "epoch": 2.2403879459646694, "grad_norm": 0.6732940077781677, "learning_rate": 7.764216366158114e-06, "loss": 0.0508, "step": 3234 }, { "epoch": 2.2410807066158642, "grad_norm": 0.735000729560852, "learning_rate": 7.76352288488211e-06, "loss": 0.069, "step": 3235 }, { "epoch": 2.241773467267059, "grad_norm": 0.6428692936897278, "learning_rate": 7.762829403606104e-06, "loss": 0.0511, "step": 3236 }, { "epoch": 2.2424662279182543, "grad_norm": 0.7943806648254395, "learning_rate": 7.762135922330097e-06, "loss": 0.0669, "step": 3237 }, { "epoch": 2.243158988569449, "grad_norm": 0.6358294486999512, "learning_rate": 7.761442441054092e-06, "loss": 0.0482, "step": 3238 }, { "epoch": 2.2438517492206445, "grad_norm": 0.6320732831954956, "learning_rate": 7.760748959778087e-06, "loss": 0.0478, "step": 3239 }, { "epoch": 2.2445445098718393, "grad_norm": 0.8697232604026794, "learning_rate": 7.76005547850208e-06, "loss": 0.0598, "step": 3240 }, { "epoch": 2.245237270523034, "grad_norm": 0.5765998959541321, "learning_rate": 7.759361997226076e-06, "loss": 0.0458, "step": 3241 }, { "epoch": 2.2459300311742294, "grad_norm": 0.6806704998016357, "learning_rate": 7.75866851595007e-06, "loss": 0.0681, "step": 3242 }, { "epoch": 2.2466227918254242, "grad_norm": 0.5612189173698425, "learning_rate": 7.757975034674065e-06, "loss": 0.0433, "step": 3243 }, { "epoch": 2.2473155524766195, "grad_norm": 0.6763518452644348, "learning_rate": 7.757281553398059e-06, "loss": 0.0591, "step": 3244 }, { "epoch": 2.2480083131278144, "grad_norm": 0.7218267917633057, "learning_rate": 7.756588072122054e-06, "loss": 0.0678, "step": 3245 }, { "epoch": 2.248701073779009, "grad_norm": 0.5798293352127075, "learning_rate": 7.755894590846048e-06, "loss": 0.0378, "step": 3246 }, { "epoch": 2.2493938344302045, "grad_norm": 0.6329561471939087, "learning_rate": 7.755201109570042e-06, "loss": 0.0493, "step": 3247 }, { "epoch": 2.2500865950813993, "grad_norm": 0.8438361287117004, "learning_rate": 7.754507628294037e-06, "loss": 0.0567, "step": 3248 }, { "epoch": 2.2507793557325946, "grad_norm": 0.6507179737091064, "learning_rate": 7.753814147018032e-06, "loss": 0.0466, "step": 3249 }, { "epoch": 2.2514721163837894, "grad_norm": 0.6499149799346924, "learning_rate": 7.753120665742025e-06, "loss": 0.0614, "step": 3250 }, { "epoch": 2.2521648770349842, "grad_norm": 0.7082823514938354, "learning_rate": 7.75242718446602e-06, "loss": 0.0567, "step": 3251 }, { "epoch": 2.2528576376861795, "grad_norm": 0.8208842873573303, "learning_rate": 7.751733703190015e-06, "loss": 0.0416, "step": 3252 }, { "epoch": 2.2535503983373744, "grad_norm": 0.7298017144203186, "learning_rate": 7.75104022191401e-06, "loss": 0.0558, "step": 3253 }, { "epoch": 2.2542431589885696, "grad_norm": 0.6573927402496338, "learning_rate": 7.750346740638005e-06, "loss": 0.0467, "step": 3254 }, { "epoch": 2.2549359196397645, "grad_norm": 0.7816340923309326, "learning_rate": 7.749653259361998e-06, "loss": 0.0587, "step": 3255 }, { "epoch": 2.2556286802909593, "grad_norm": 0.9797429442405701, "learning_rate": 7.748959778085993e-06, "loss": 0.0526, "step": 3256 }, { "epoch": 2.2563214409421546, "grad_norm": 0.8206029534339905, "learning_rate": 7.748266296809986e-06, "loss": 0.0782, "step": 3257 }, { "epoch": 2.2570142015933494, "grad_norm": 0.6237604022026062, "learning_rate": 7.747572815533981e-06, "loss": 0.0525, "step": 3258 }, { "epoch": 2.2577069622445447, "grad_norm": 0.5808584094047546, "learning_rate": 7.746879334257976e-06, "loss": 0.051, "step": 3259 }, { "epoch": 2.2583997228957395, "grad_norm": 0.6812832355499268, "learning_rate": 7.74618585298197e-06, "loss": 0.05, "step": 3260 }, { "epoch": 2.2590924835469344, "grad_norm": 0.6276348829269409, "learning_rate": 7.745492371705966e-06, "loss": 0.0453, "step": 3261 }, { "epoch": 2.2597852441981297, "grad_norm": 0.6594873070716858, "learning_rate": 7.744798890429959e-06, "loss": 0.0522, "step": 3262 }, { "epoch": 2.2604780048493245, "grad_norm": 0.6078917980194092, "learning_rate": 7.744105409153954e-06, "loss": 0.0524, "step": 3263 }, { "epoch": 2.2611707655005198, "grad_norm": 0.7057173252105713, "learning_rate": 7.743411927877947e-06, "loss": 0.0725, "step": 3264 }, { "epoch": 2.2618635261517146, "grad_norm": 0.8082737922668457, "learning_rate": 7.742718446601942e-06, "loss": 0.0569, "step": 3265 }, { "epoch": 2.2625562868029094, "grad_norm": 0.7110036015510559, "learning_rate": 7.742024965325937e-06, "loss": 0.0537, "step": 3266 }, { "epoch": 2.2632490474541047, "grad_norm": 0.6635528206825256, "learning_rate": 7.74133148404993e-06, "loss": 0.0534, "step": 3267 }, { "epoch": 2.2639418081052995, "grad_norm": 0.7297467589378357, "learning_rate": 7.740638002773925e-06, "loss": 0.0476, "step": 3268 }, { "epoch": 2.2646345687564944, "grad_norm": 0.629737377166748, "learning_rate": 7.73994452149792e-06, "loss": 0.0504, "step": 3269 }, { "epoch": 2.2653273294076897, "grad_norm": 0.7582229971885681, "learning_rate": 7.739251040221915e-06, "loss": 0.0601, "step": 3270 }, { "epoch": 2.2660200900588845, "grad_norm": 0.8479446172714233, "learning_rate": 7.73855755894591e-06, "loss": 0.0457, "step": 3271 }, { "epoch": 2.2667128507100798, "grad_norm": 0.7234289050102234, "learning_rate": 7.737864077669903e-06, "loss": 0.0508, "step": 3272 }, { "epoch": 2.2674056113612746, "grad_norm": 0.653535008430481, "learning_rate": 7.737170596393898e-06, "loss": 0.061, "step": 3273 }, { "epoch": 2.26809837201247, "grad_norm": 0.6727892160415649, "learning_rate": 7.736477115117891e-06, "loss": 0.043, "step": 3274 }, { "epoch": 2.2687911326636647, "grad_norm": 0.7123434543609619, "learning_rate": 7.735783633841886e-06, "loss": 0.0571, "step": 3275 }, { "epoch": 2.2694838933148596, "grad_norm": 0.6722378134727478, "learning_rate": 7.735090152565881e-06, "loss": 0.0449, "step": 3276 }, { "epoch": 2.270176653966055, "grad_norm": 0.8674989342689514, "learning_rate": 7.734396671289876e-06, "loss": 0.0588, "step": 3277 }, { "epoch": 2.2708694146172497, "grad_norm": 0.6619544625282288, "learning_rate": 7.733703190013871e-06, "loss": 0.0478, "step": 3278 }, { "epoch": 2.2715621752684445, "grad_norm": 0.7146316766738892, "learning_rate": 7.733009708737864e-06, "loss": 0.0578, "step": 3279 }, { "epoch": 2.27225493591964, "grad_norm": 0.6067398190498352, "learning_rate": 7.73231622746186e-06, "loss": 0.0427, "step": 3280 }, { "epoch": 2.2729476965708346, "grad_norm": 0.7777449488639832, "learning_rate": 7.731622746185854e-06, "loss": 0.0513, "step": 3281 }, { "epoch": 2.27364045722203, "grad_norm": 0.6652003526687622, "learning_rate": 7.730929264909847e-06, "loss": 0.0685, "step": 3282 }, { "epoch": 2.2743332178732247, "grad_norm": 0.7848238348960876, "learning_rate": 7.730235783633842e-06, "loss": 0.0771, "step": 3283 }, { "epoch": 2.27502597852442, "grad_norm": 0.6617704033851624, "learning_rate": 7.729542302357836e-06, "loss": 0.0468, "step": 3284 }, { "epoch": 2.275718739175615, "grad_norm": 0.712853729724884, "learning_rate": 7.72884882108183e-06, "loss": 0.0644, "step": 3285 }, { "epoch": 2.2764114998268097, "grad_norm": 0.6999899744987488, "learning_rate": 7.728155339805825e-06, "loss": 0.0533, "step": 3286 }, { "epoch": 2.277104260478005, "grad_norm": 0.790968120098114, "learning_rate": 7.72746185852982e-06, "loss": 0.0718, "step": 3287 }, { "epoch": 2.2777970211292, "grad_norm": 0.6867518424987793, "learning_rate": 7.726768377253815e-06, "loss": 0.0668, "step": 3288 }, { "epoch": 2.2784897817803946, "grad_norm": 0.6289021968841553, "learning_rate": 7.726074895977809e-06, "loss": 0.0573, "step": 3289 }, { "epoch": 2.27918254243159, "grad_norm": 0.7025508880615234, "learning_rate": 7.725381414701804e-06, "loss": 0.0459, "step": 3290 }, { "epoch": 2.2798753030827847, "grad_norm": 0.6779745817184448, "learning_rate": 7.724687933425798e-06, "loss": 0.0444, "step": 3291 }, { "epoch": 2.28056806373398, "grad_norm": 0.6166813969612122, "learning_rate": 7.723994452149792e-06, "loss": 0.0617, "step": 3292 }, { "epoch": 2.281260824385175, "grad_norm": 0.766677975654602, "learning_rate": 7.723300970873787e-06, "loss": 0.0506, "step": 3293 }, { "epoch": 2.28195358503637, "grad_norm": 0.9004775285720825, "learning_rate": 7.722607489597782e-06, "loss": 0.063, "step": 3294 }, { "epoch": 2.282646345687565, "grad_norm": 0.7935314774513245, "learning_rate": 7.721914008321777e-06, "loss": 0.0546, "step": 3295 }, { "epoch": 2.28333910633876, "grad_norm": 0.7481679916381836, "learning_rate": 7.721220527045771e-06, "loss": 0.0589, "step": 3296 }, { "epoch": 2.284031866989955, "grad_norm": 0.7239437103271484, "learning_rate": 7.720527045769765e-06, "loss": 0.0427, "step": 3297 }, { "epoch": 2.28472462764115, "grad_norm": 0.7293107509613037, "learning_rate": 7.71983356449376e-06, "loss": 0.0555, "step": 3298 }, { "epoch": 2.2854173882923448, "grad_norm": 0.6999469995498657, "learning_rate": 7.719140083217753e-06, "loss": 0.0713, "step": 3299 }, { "epoch": 2.28611014894354, "grad_norm": 0.6839814186096191, "learning_rate": 7.718446601941748e-06, "loss": 0.049, "step": 3300 }, { "epoch": 2.286802909594735, "grad_norm": 0.6535493731498718, "learning_rate": 7.717753120665743e-06, "loss": 0.0533, "step": 3301 }, { "epoch": 2.28749567024593, "grad_norm": 0.6733627319335938, "learning_rate": 7.717059639389736e-06, "loss": 0.0628, "step": 3302 }, { "epoch": 2.288188430897125, "grad_norm": 0.7033507823944092, "learning_rate": 7.716366158113731e-06, "loss": 0.0473, "step": 3303 }, { "epoch": 2.2888811915483203, "grad_norm": 0.8091992139816284, "learning_rate": 7.715672676837726e-06, "loss": 0.0715, "step": 3304 }, { "epoch": 2.289573952199515, "grad_norm": 0.6329368352890015, "learning_rate": 7.71497919556172e-06, "loss": 0.0407, "step": 3305 }, { "epoch": 2.29026671285071, "grad_norm": 0.6877524852752686, "learning_rate": 7.714285714285716e-06, "loss": 0.0586, "step": 3306 }, { "epoch": 2.290959473501905, "grad_norm": 0.7619491815567017, "learning_rate": 7.713592233009709e-06, "loss": 0.0565, "step": 3307 }, { "epoch": 2.2916522341531, "grad_norm": 0.6912320256233215, "learning_rate": 7.712898751733704e-06, "loss": 0.0632, "step": 3308 }, { "epoch": 2.292344994804295, "grad_norm": 0.7955142259597778, "learning_rate": 7.712205270457697e-06, "loss": 0.0456, "step": 3309 }, { "epoch": 2.29303775545549, "grad_norm": 0.8076238036155701, "learning_rate": 7.711511789181692e-06, "loss": 0.066, "step": 3310 }, { "epoch": 2.293730516106685, "grad_norm": 0.6438044905662537, "learning_rate": 7.710818307905687e-06, "loss": 0.0508, "step": 3311 }, { "epoch": 2.2944232767578803, "grad_norm": 0.6734865307807922, "learning_rate": 7.710124826629682e-06, "loss": 0.0561, "step": 3312 }, { "epoch": 2.295116037409075, "grad_norm": 0.6374542117118835, "learning_rate": 7.709431345353677e-06, "loss": 0.0511, "step": 3313 }, { "epoch": 2.2958087980602704, "grad_norm": 0.7424620985984802, "learning_rate": 7.70873786407767e-06, "loss": 0.0665, "step": 3314 }, { "epoch": 2.296501558711465, "grad_norm": 0.6437535881996155, "learning_rate": 7.708044382801665e-06, "loss": 0.0526, "step": 3315 }, { "epoch": 2.29719431936266, "grad_norm": 0.674333930015564, "learning_rate": 7.70735090152566e-06, "loss": 0.061, "step": 3316 }, { "epoch": 2.2978870800138553, "grad_norm": 0.759538471698761, "learning_rate": 7.706657420249653e-06, "loss": 0.0749, "step": 3317 }, { "epoch": 2.29857984066505, "grad_norm": 0.7620114088058472, "learning_rate": 7.705963938973648e-06, "loss": 0.072, "step": 3318 }, { "epoch": 2.299272601316245, "grad_norm": 0.7666256427764893, "learning_rate": 7.705270457697643e-06, "loss": 0.0544, "step": 3319 }, { "epoch": 2.2999653619674403, "grad_norm": 0.703120768070221, "learning_rate": 7.704576976421638e-06, "loss": 0.05, "step": 3320 }, { "epoch": 2.300658122618635, "grad_norm": 0.6174482107162476, "learning_rate": 7.703883495145631e-06, "loss": 0.0411, "step": 3321 }, { "epoch": 2.3013508832698304, "grad_norm": 0.7337712049484253, "learning_rate": 7.703190013869626e-06, "loss": 0.0749, "step": 3322 }, { "epoch": 2.3020436439210252, "grad_norm": 0.754616379737854, "learning_rate": 7.702496532593621e-06, "loss": 0.0542, "step": 3323 }, { "epoch": 2.3027364045722205, "grad_norm": 0.5999162793159485, "learning_rate": 7.701803051317614e-06, "loss": 0.0453, "step": 3324 }, { "epoch": 2.3034291652234153, "grad_norm": 0.5953550338745117, "learning_rate": 7.70110957004161e-06, "loss": 0.0388, "step": 3325 }, { "epoch": 2.30412192587461, "grad_norm": 0.6107983589172363, "learning_rate": 7.700416088765604e-06, "loss": 0.0504, "step": 3326 }, { "epoch": 2.3048146865258055, "grad_norm": 0.625367283821106, "learning_rate": 7.699722607489597e-06, "loss": 0.0414, "step": 3327 }, { "epoch": 2.3055074471770003, "grad_norm": 0.6652399301528931, "learning_rate": 7.699029126213592e-06, "loss": 0.0479, "step": 3328 }, { "epoch": 2.306200207828195, "grad_norm": 0.6237362623214722, "learning_rate": 7.698335644937587e-06, "loss": 0.0556, "step": 3329 }, { "epoch": 2.3068929684793904, "grad_norm": 0.707344114780426, "learning_rate": 7.697642163661582e-06, "loss": 0.0503, "step": 3330 }, { "epoch": 2.3075857291305852, "grad_norm": 0.5936099290847778, "learning_rate": 7.696948682385577e-06, "loss": 0.0484, "step": 3331 }, { "epoch": 2.3082784897817805, "grad_norm": 0.6967319250106812, "learning_rate": 7.69625520110957e-06, "loss": 0.0482, "step": 3332 }, { "epoch": 2.3089712504329754, "grad_norm": 0.6179273128509521, "learning_rate": 7.695561719833565e-06, "loss": 0.0512, "step": 3333 }, { "epoch": 2.3096640110841706, "grad_norm": 0.6576906442642212, "learning_rate": 7.694868238557559e-06, "loss": 0.0524, "step": 3334 }, { "epoch": 2.3103567717353655, "grad_norm": 0.6755362749099731, "learning_rate": 7.694174757281554e-06, "loss": 0.059, "step": 3335 }, { "epoch": 2.3110495323865603, "grad_norm": 0.6898190379142761, "learning_rate": 7.693481276005548e-06, "loss": 0.0511, "step": 3336 }, { "epoch": 2.3117422930377556, "grad_norm": 0.7592430114746094, "learning_rate": 7.692787794729543e-06, "loss": 0.0711, "step": 3337 }, { "epoch": 2.3124350536889504, "grad_norm": 0.7317067384719849, "learning_rate": 7.692094313453538e-06, "loss": 0.0535, "step": 3338 }, { "epoch": 2.3131278143401452, "grad_norm": 0.6594251990318298, "learning_rate": 7.691400832177532e-06, "loss": 0.0502, "step": 3339 }, { "epoch": 2.3138205749913405, "grad_norm": 0.6752980351448059, "learning_rate": 7.690707350901526e-06, "loss": 0.0561, "step": 3340 }, { "epoch": 2.3145133356425354, "grad_norm": 0.7381166815757751, "learning_rate": 7.690013869625521e-06, "loss": 0.059, "step": 3341 }, { "epoch": 2.3152060962937306, "grad_norm": 0.8149734139442444, "learning_rate": 7.689320388349515e-06, "loss": 0.07, "step": 3342 }, { "epoch": 2.3158988569449255, "grad_norm": 0.6074904203414917, "learning_rate": 7.68862690707351e-06, "loss": 0.0454, "step": 3343 }, { "epoch": 2.3165916175961208, "grad_norm": 0.7600086331367493, "learning_rate": 7.687933425797503e-06, "loss": 0.0617, "step": 3344 }, { "epoch": 2.3172843782473156, "grad_norm": 0.6978055238723755, "learning_rate": 7.687239944521498e-06, "loss": 0.0532, "step": 3345 }, { "epoch": 2.3179771388985104, "grad_norm": 0.5998455882072449, "learning_rate": 7.686546463245493e-06, "loss": 0.0547, "step": 3346 }, { "epoch": 2.3186698995497057, "grad_norm": 0.7415463328361511, "learning_rate": 7.685852981969488e-06, "loss": 0.0546, "step": 3347 }, { "epoch": 2.3193626602009005, "grad_norm": 0.6985805034637451, "learning_rate": 7.685159500693483e-06, "loss": 0.0598, "step": 3348 }, { "epoch": 2.3200554208520954, "grad_norm": 0.7012605667114258, "learning_rate": 7.684466019417476e-06, "loss": 0.0643, "step": 3349 }, { "epoch": 2.3207481815032907, "grad_norm": 0.7899414896965027, "learning_rate": 7.68377253814147e-06, "loss": 0.0494, "step": 3350 }, { "epoch": 2.3214409421544855, "grad_norm": 0.7159631848335266, "learning_rate": 7.683079056865466e-06, "loss": 0.0545, "step": 3351 }, { "epoch": 2.3221337028056808, "grad_norm": 0.6470344066619873, "learning_rate": 7.682385575589459e-06, "loss": 0.0501, "step": 3352 }, { "epoch": 2.3228264634568756, "grad_norm": 0.6960127353668213, "learning_rate": 7.681692094313454e-06, "loss": 0.0595, "step": 3353 }, { "epoch": 2.323519224108071, "grad_norm": 0.6139276027679443, "learning_rate": 7.680998613037449e-06, "loss": 0.0559, "step": 3354 }, { "epoch": 2.3242119847592657, "grad_norm": 0.6615197062492371, "learning_rate": 7.680305131761444e-06, "loss": 0.0578, "step": 3355 }, { "epoch": 2.3249047454104605, "grad_norm": 0.6286178231239319, "learning_rate": 7.679611650485439e-06, "loss": 0.0565, "step": 3356 }, { "epoch": 2.325597506061656, "grad_norm": 0.7141321301460266, "learning_rate": 7.678918169209432e-06, "loss": 0.044, "step": 3357 }, { "epoch": 2.3262902667128507, "grad_norm": 0.7097179889678955, "learning_rate": 7.678224687933427e-06, "loss": 0.0537, "step": 3358 }, { "epoch": 2.3269830273640455, "grad_norm": 0.7265419363975525, "learning_rate": 7.67753120665742e-06, "loss": 0.0714, "step": 3359 }, { "epoch": 2.3276757880152408, "grad_norm": 0.5918153524398804, "learning_rate": 7.676837725381415e-06, "loss": 0.0411, "step": 3360 }, { "epoch": 2.3283685486664356, "grad_norm": 0.7092829942703247, "learning_rate": 7.67614424410541e-06, "loss": 0.0545, "step": 3361 }, { "epoch": 2.329061309317631, "grad_norm": 0.6691579222679138, "learning_rate": 7.675450762829403e-06, "loss": 0.0616, "step": 3362 }, { "epoch": 2.3297540699688257, "grad_norm": 0.7718387842178345, "learning_rate": 7.674757281553398e-06, "loss": 0.0662, "step": 3363 }, { "epoch": 2.330446830620021, "grad_norm": 0.7115030884742737, "learning_rate": 7.674063800277393e-06, "loss": 0.0429, "step": 3364 }, { "epoch": 2.331139591271216, "grad_norm": 0.6774365901947021, "learning_rate": 7.673370319001388e-06, "loss": 0.0498, "step": 3365 }, { "epoch": 2.3318323519224107, "grad_norm": 0.7079840302467346, "learning_rate": 7.672676837725383e-06, "loss": 0.0617, "step": 3366 }, { "epoch": 2.332525112573606, "grad_norm": 0.5688597559928894, "learning_rate": 7.671983356449376e-06, "loss": 0.0462, "step": 3367 }, { "epoch": 2.333217873224801, "grad_norm": 0.7698050737380981, "learning_rate": 7.671289875173371e-06, "loss": 0.0598, "step": 3368 }, { "epoch": 2.3339106338759956, "grad_norm": 0.8596594333648682, "learning_rate": 7.670596393897364e-06, "loss": 0.054, "step": 3369 }, { "epoch": 2.334603394527191, "grad_norm": 0.6817486882209778, "learning_rate": 7.66990291262136e-06, "loss": 0.0593, "step": 3370 }, { "epoch": 2.3352961551783857, "grad_norm": 0.6705496907234192, "learning_rate": 7.669209431345354e-06, "loss": 0.0507, "step": 3371 }, { "epoch": 2.335988915829581, "grad_norm": 0.665134608745575, "learning_rate": 7.668515950069349e-06, "loss": 0.0535, "step": 3372 }, { "epoch": 2.336681676480776, "grad_norm": 0.5743928551673889, "learning_rate": 7.667822468793344e-06, "loss": 0.0388, "step": 3373 }, { "epoch": 2.337374437131971, "grad_norm": 0.6930652260780334, "learning_rate": 7.667128987517337e-06, "loss": 0.062, "step": 3374 }, { "epoch": 2.338067197783166, "grad_norm": 0.6121317744255066, "learning_rate": 7.666435506241332e-06, "loss": 0.0415, "step": 3375 }, { "epoch": 2.338759958434361, "grad_norm": 0.7009512782096863, "learning_rate": 7.665742024965327e-06, "loss": 0.0519, "step": 3376 }, { "epoch": 2.339452719085556, "grad_norm": 0.7098727822303772, "learning_rate": 7.66504854368932e-06, "loss": 0.0469, "step": 3377 }, { "epoch": 2.340145479736751, "grad_norm": 0.6412307620048523, "learning_rate": 7.664355062413315e-06, "loss": 0.051, "step": 3378 }, { "epoch": 2.3408382403879457, "grad_norm": 0.8032299876213074, "learning_rate": 7.663661581137309e-06, "loss": 0.0665, "step": 3379 }, { "epoch": 2.341531001039141, "grad_norm": 0.7739721536636353, "learning_rate": 7.662968099861303e-06, "loss": 0.0553, "step": 3380 }, { "epoch": 2.342223761690336, "grad_norm": 0.5881069302558899, "learning_rate": 7.662274618585298e-06, "loss": 0.0424, "step": 3381 }, { "epoch": 2.342916522341531, "grad_norm": 0.7282366156578064, "learning_rate": 7.661581137309293e-06, "loss": 0.0489, "step": 3382 }, { "epoch": 2.343609282992726, "grad_norm": 0.6442456245422363, "learning_rate": 7.660887656033288e-06, "loss": 0.0536, "step": 3383 }, { "epoch": 2.3443020436439213, "grad_norm": 0.6761475801467896, "learning_rate": 7.660194174757282e-06, "loss": 0.0523, "step": 3384 }, { "epoch": 2.344994804295116, "grad_norm": 0.6950258016586304, "learning_rate": 7.659500693481276e-06, "loss": 0.0572, "step": 3385 }, { "epoch": 2.345687564946311, "grad_norm": 0.6278981566429138, "learning_rate": 7.658807212205271e-06, "loss": 0.0509, "step": 3386 }, { "epoch": 2.346380325597506, "grad_norm": 0.6928952932357788, "learning_rate": 7.658113730929265e-06, "loss": 0.0524, "step": 3387 }, { "epoch": 2.347073086248701, "grad_norm": 0.7491379380226135, "learning_rate": 7.65742024965326e-06, "loss": 0.0461, "step": 3388 }, { "epoch": 2.347765846899896, "grad_norm": 0.7919186353683472, "learning_rate": 7.656726768377255e-06, "loss": 0.061, "step": 3389 }, { "epoch": 2.348458607551091, "grad_norm": 0.7522119283676147, "learning_rate": 7.65603328710125e-06, "loss": 0.0685, "step": 3390 }, { "epoch": 2.349151368202286, "grad_norm": 0.6858955025672913, "learning_rate": 7.655339805825244e-06, "loss": 0.0695, "step": 3391 }, { "epoch": 2.3498441288534813, "grad_norm": 0.7146169543266296, "learning_rate": 7.654646324549238e-06, "loss": 0.0649, "step": 3392 }, { "epoch": 2.350536889504676, "grad_norm": 0.7618662714958191, "learning_rate": 7.653952843273233e-06, "loss": 0.0639, "step": 3393 }, { "epoch": 2.3512296501558714, "grad_norm": 0.7013067007064819, "learning_rate": 7.653259361997226e-06, "loss": 0.051, "step": 3394 }, { "epoch": 2.351922410807066, "grad_norm": 0.7284305095672607, "learning_rate": 7.65256588072122e-06, "loss": 0.0575, "step": 3395 }, { "epoch": 2.352615171458261, "grad_norm": 0.951767086982727, "learning_rate": 7.651872399445216e-06, "loss": 0.0742, "step": 3396 }, { "epoch": 2.3533079321094563, "grad_norm": 0.6954596638679504, "learning_rate": 7.65117891816921e-06, "loss": 0.064, "step": 3397 }, { "epoch": 2.354000692760651, "grad_norm": 0.7585980892181396, "learning_rate": 7.650485436893204e-06, "loss": 0.0561, "step": 3398 }, { "epoch": 2.354693453411846, "grad_norm": 0.5955365300178528, "learning_rate": 7.649791955617199e-06, "loss": 0.05, "step": 3399 }, { "epoch": 2.3553862140630413, "grad_norm": 0.7831491827964783, "learning_rate": 7.649098474341194e-06, "loss": 0.0649, "step": 3400 }, { "epoch": 2.356078974714236, "grad_norm": 0.7788838744163513, "learning_rate": 7.648404993065189e-06, "loss": 0.0615, "step": 3401 }, { "epoch": 2.3567717353654314, "grad_norm": 0.5785791277885437, "learning_rate": 7.647711511789182e-06, "loss": 0.0473, "step": 3402 }, { "epoch": 2.357464496016626, "grad_norm": 0.6168627738952637, "learning_rate": 7.647018030513177e-06, "loss": 0.0543, "step": 3403 }, { "epoch": 2.3581572566678215, "grad_norm": 0.7591379284858704, "learning_rate": 7.64632454923717e-06, "loss": 0.0655, "step": 3404 }, { "epoch": 2.3588500173190163, "grad_norm": 0.601206362247467, "learning_rate": 7.645631067961165e-06, "loss": 0.0425, "step": 3405 }, { "epoch": 2.359542777970211, "grad_norm": 0.6227657198905945, "learning_rate": 7.64493758668516e-06, "loss": 0.0481, "step": 3406 }, { "epoch": 2.3602355386214064, "grad_norm": 0.6924575567245483, "learning_rate": 7.644244105409155e-06, "loss": 0.0645, "step": 3407 }, { "epoch": 2.3609282992726013, "grad_norm": 0.6208140254020691, "learning_rate": 7.64355062413315e-06, "loss": 0.0464, "step": 3408 }, { "epoch": 2.361621059923796, "grad_norm": 0.6188228130340576, "learning_rate": 7.642857142857143e-06, "loss": 0.0444, "step": 3409 }, { "epoch": 2.3623138205749914, "grad_norm": 0.6708952188491821, "learning_rate": 7.642163661581138e-06, "loss": 0.0551, "step": 3410 }, { "epoch": 2.3630065812261862, "grad_norm": 0.6148442029953003, "learning_rate": 7.641470180305133e-06, "loss": 0.0457, "step": 3411 }, { "epoch": 2.3636993418773815, "grad_norm": 0.6684728860855103, "learning_rate": 7.640776699029126e-06, "loss": 0.0536, "step": 3412 }, { "epoch": 2.3643921025285763, "grad_norm": 0.729336678981781, "learning_rate": 7.640083217753121e-06, "loss": 0.047, "step": 3413 }, { "epoch": 2.3650848631797716, "grad_norm": 0.5910308957099915, "learning_rate": 7.639389736477116e-06, "loss": 0.0452, "step": 3414 }, { "epoch": 2.3657776238309665, "grad_norm": 0.621016800403595, "learning_rate": 7.638696255201111e-06, "loss": 0.0474, "step": 3415 }, { "epoch": 2.3664703844821613, "grad_norm": 0.7773977518081665, "learning_rate": 7.638002773925106e-06, "loss": 0.0418, "step": 3416 }, { "epoch": 2.3671631451333566, "grad_norm": 0.8472411036491394, "learning_rate": 7.637309292649099e-06, "loss": 0.081, "step": 3417 }, { "epoch": 2.3678559057845514, "grad_norm": 0.6587639451026917, "learning_rate": 7.636615811373094e-06, "loss": 0.0467, "step": 3418 }, { "epoch": 2.3685486664357462, "grad_norm": 0.811945378780365, "learning_rate": 7.635922330097087e-06, "loss": 0.0623, "step": 3419 }, { "epoch": 2.3692414270869415, "grad_norm": 0.6511952877044678, "learning_rate": 7.635228848821082e-06, "loss": 0.0497, "step": 3420 }, { "epoch": 2.3699341877381364, "grad_norm": 0.6955153346061707, "learning_rate": 7.634535367545077e-06, "loss": 0.0551, "step": 3421 }, { "epoch": 2.3706269483893316, "grad_norm": 0.7224912643432617, "learning_rate": 7.63384188626907e-06, "loss": 0.0559, "step": 3422 }, { "epoch": 2.3713197090405265, "grad_norm": 0.6983798742294312, "learning_rate": 7.633148404993065e-06, "loss": 0.049, "step": 3423 }, { "epoch": 2.3720124696917217, "grad_norm": 0.696856677532196, "learning_rate": 7.63245492371706e-06, "loss": 0.0517, "step": 3424 }, { "epoch": 2.3727052303429166, "grad_norm": 0.7483749389648438, "learning_rate": 7.631761442441055e-06, "loss": 0.0525, "step": 3425 }, { "epoch": 2.3733979909941114, "grad_norm": 0.8678938150405884, "learning_rate": 7.63106796116505e-06, "loss": 0.0616, "step": 3426 }, { "epoch": 2.3740907516453067, "grad_norm": 0.7085834741592407, "learning_rate": 7.630374479889043e-06, "loss": 0.0672, "step": 3427 }, { "epoch": 2.3747835122965015, "grad_norm": 0.6799753904342651, "learning_rate": 7.629680998613038e-06, "loss": 0.0479, "step": 3428 }, { "epoch": 2.3754762729476964, "grad_norm": 0.654399037361145, "learning_rate": 7.628987517337032e-06, "loss": 0.0506, "step": 3429 }, { "epoch": 2.3761690335988916, "grad_norm": 0.7557405829429626, "learning_rate": 7.628294036061027e-06, "loss": 0.0622, "step": 3430 }, { "epoch": 2.3768617942500865, "grad_norm": 0.6945838928222656, "learning_rate": 7.627600554785022e-06, "loss": 0.0597, "step": 3431 }, { "epoch": 2.3775545549012818, "grad_norm": 0.7190044522285461, "learning_rate": 7.6269070735090155e-06, "loss": 0.0458, "step": 3432 }, { "epoch": 2.3782473155524766, "grad_norm": 0.7875450849533081, "learning_rate": 7.62621359223301e-06, "loss": 0.0587, "step": 3433 }, { "epoch": 2.378940076203672, "grad_norm": 0.7193741798400879, "learning_rate": 7.6255201109570045e-06, "loss": 0.0612, "step": 3434 }, { "epoch": 2.3796328368548667, "grad_norm": 0.5874949097633362, "learning_rate": 7.624826629680999e-06, "loss": 0.047, "step": 3435 }, { "epoch": 2.3803255975060615, "grad_norm": 0.6685798764228821, "learning_rate": 7.624133148404994e-06, "loss": 0.0498, "step": 3436 }, { "epoch": 2.381018358157257, "grad_norm": 0.6457735896110535, "learning_rate": 7.623439667128988e-06, "loss": 0.0602, "step": 3437 }, { "epoch": 2.3817111188084517, "grad_norm": 0.7490965127944946, "learning_rate": 7.6227461858529825e-06, "loss": 0.061, "step": 3438 }, { "epoch": 2.3824038794596465, "grad_norm": 0.9154457449913025, "learning_rate": 7.622052704576977e-06, "loss": 0.0629, "step": 3439 }, { "epoch": 2.3830966401108418, "grad_norm": 0.6912528276443481, "learning_rate": 7.6213592233009715e-06, "loss": 0.0523, "step": 3440 }, { "epoch": 2.3837894007620366, "grad_norm": 0.7128589153289795, "learning_rate": 7.6206657420249665e-06, "loss": 0.0487, "step": 3441 }, { "epoch": 2.384482161413232, "grad_norm": 0.7874253392219543, "learning_rate": 7.61997226074896e-06, "loss": 0.0524, "step": 3442 }, { "epoch": 2.3851749220644267, "grad_norm": 0.7684004306793213, "learning_rate": 7.619278779472955e-06, "loss": 0.0701, "step": 3443 }, { "epoch": 2.385867682715622, "grad_norm": 0.8227635622024536, "learning_rate": 7.618585298196949e-06, "loss": 0.0557, "step": 3444 }, { "epoch": 2.386560443366817, "grad_norm": 0.6978826522827148, "learning_rate": 7.617891816920944e-06, "loss": 0.0556, "step": 3445 }, { "epoch": 2.3872532040180117, "grad_norm": 0.7304977774620056, "learning_rate": 7.617198335644939e-06, "loss": 0.0483, "step": 3446 }, { "epoch": 2.387945964669207, "grad_norm": 0.6925829648971558, "learning_rate": 7.616504854368933e-06, "loss": 0.0586, "step": 3447 }, { "epoch": 2.388638725320402, "grad_norm": 0.7023626565933228, "learning_rate": 7.615811373092928e-06, "loss": 0.068, "step": 3448 }, { "epoch": 2.3893314859715966, "grad_norm": 0.6978974938392639, "learning_rate": 7.615117891816921e-06, "loss": 0.0533, "step": 3449 }, { "epoch": 2.390024246622792, "grad_norm": 0.6212133169174194, "learning_rate": 7.614424410540916e-06, "loss": 0.048, "step": 3450 }, { "epoch": 2.3907170072739867, "grad_norm": 0.6057196259498596, "learning_rate": 7.613730929264911e-06, "loss": 0.0455, "step": 3451 }, { "epoch": 2.391409767925182, "grad_norm": 0.7019253373146057, "learning_rate": 7.613037447988905e-06, "loss": 0.05, "step": 3452 }, { "epoch": 2.392102528576377, "grad_norm": 0.6439147591590881, "learning_rate": 7.6123439667129e-06, "loss": 0.0482, "step": 3453 }, { "epoch": 2.392795289227572, "grad_norm": 0.7034726142883301, "learning_rate": 7.611650485436893e-06, "loss": 0.0513, "step": 3454 }, { "epoch": 2.393488049878767, "grad_norm": 0.576032280921936, "learning_rate": 7.610957004160888e-06, "loss": 0.0416, "step": 3455 }, { "epoch": 2.394180810529962, "grad_norm": 0.6888068914413452, "learning_rate": 7.610263522884883e-06, "loss": 0.0572, "step": 3456 }, { "epoch": 2.394873571181157, "grad_norm": 0.585434079170227, "learning_rate": 7.609570041608877e-06, "loss": 0.0412, "step": 3457 }, { "epoch": 2.395566331832352, "grad_norm": 0.6731674075126648, "learning_rate": 7.608876560332872e-06, "loss": 0.0625, "step": 3458 }, { "epoch": 2.3962590924835467, "grad_norm": 0.645865797996521, "learning_rate": 7.608183079056866e-06, "loss": 0.0574, "step": 3459 }, { "epoch": 2.396951853134742, "grad_norm": 0.7278096079826355, "learning_rate": 7.607489597780861e-06, "loss": 0.0531, "step": 3460 }, { "epoch": 2.397644613785937, "grad_norm": 0.692797064781189, "learning_rate": 7.606796116504855e-06, "loss": 0.0449, "step": 3461 }, { "epoch": 2.398337374437132, "grad_norm": 0.6844128370285034, "learning_rate": 7.606102635228849e-06, "loss": 0.0637, "step": 3462 }, { "epoch": 2.399030135088327, "grad_norm": 0.6277254223823547, "learning_rate": 7.605409153952844e-06, "loss": 0.0547, "step": 3463 }, { "epoch": 2.3997228957395222, "grad_norm": 0.6868218183517456, "learning_rate": 7.604715672676838e-06, "loss": 0.0573, "step": 3464 }, { "epoch": 2.400415656390717, "grad_norm": 0.9933450818061829, "learning_rate": 7.604022191400833e-06, "loss": 0.0793, "step": 3465 }, { "epoch": 2.401108417041912, "grad_norm": 0.6962992548942566, "learning_rate": 7.603328710124828e-06, "loss": 0.0528, "step": 3466 }, { "epoch": 2.401801177693107, "grad_norm": 0.770677387714386, "learning_rate": 7.602635228848821e-06, "loss": 0.0648, "step": 3467 }, { "epoch": 2.402493938344302, "grad_norm": 0.7920851111412048, "learning_rate": 7.601941747572816e-06, "loss": 0.0569, "step": 3468 }, { "epoch": 2.403186698995497, "grad_norm": 0.8551387786865234, "learning_rate": 7.60124826629681e-06, "loss": 0.0508, "step": 3469 }, { "epoch": 2.403879459646692, "grad_norm": 0.6823190450668335, "learning_rate": 7.600554785020805e-06, "loss": 0.051, "step": 3470 }, { "epoch": 2.404572220297887, "grad_norm": 0.7619594931602478, "learning_rate": 7.5998613037448e-06, "loss": 0.0685, "step": 3471 }, { "epoch": 2.4052649809490823, "grad_norm": 0.5996909141540527, "learning_rate": 7.599167822468793e-06, "loss": 0.0423, "step": 3472 }, { "epoch": 2.405957741600277, "grad_norm": 0.7393822073936462, "learning_rate": 7.598474341192788e-06, "loss": 0.0518, "step": 3473 }, { "epoch": 2.406650502251472, "grad_norm": 0.7016523480415344, "learning_rate": 7.597780859916782e-06, "loss": 0.0538, "step": 3474 }, { "epoch": 2.407343262902667, "grad_norm": 0.634170651435852, "learning_rate": 7.597087378640777e-06, "loss": 0.0527, "step": 3475 }, { "epoch": 2.408036023553862, "grad_norm": 0.7694522142410278, "learning_rate": 7.596393897364772e-06, "loss": 0.0574, "step": 3476 }, { "epoch": 2.4087287842050573, "grad_norm": 0.727978527545929, "learning_rate": 7.595700416088766e-06, "loss": 0.0494, "step": 3477 }, { "epoch": 2.409421544856252, "grad_norm": 0.6250091195106506, "learning_rate": 7.595006934812761e-06, "loss": 0.0441, "step": 3478 }, { "epoch": 2.410114305507447, "grad_norm": 0.7196205258369446, "learning_rate": 7.5943134535367545e-06, "loss": 0.0645, "step": 3479 }, { "epoch": 2.4108070661586423, "grad_norm": 0.8385190963745117, "learning_rate": 7.593619972260749e-06, "loss": 0.0679, "step": 3480 }, { "epoch": 2.411499826809837, "grad_norm": 1.0772920846939087, "learning_rate": 7.592926490984744e-06, "loss": 0.0838, "step": 3481 }, { "epoch": 2.4121925874610324, "grad_norm": 0.7560489773750305, "learning_rate": 7.592233009708738e-06, "loss": 0.0641, "step": 3482 }, { "epoch": 2.412885348112227, "grad_norm": 0.6982017159461975, "learning_rate": 7.591539528432733e-06, "loss": 0.0552, "step": 3483 }, { "epoch": 2.413578108763422, "grad_norm": 0.6059712171554565, "learning_rate": 7.590846047156727e-06, "loss": 0.0476, "step": 3484 }, { "epoch": 2.4142708694146173, "grad_norm": 0.7760432362556458, "learning_rate": 7.5901525658807215e-06, "loss": 0.0565, "step": 3485 }, { "epoch": 2.414963630065812, "grad_norm": 0.6153656840324402, "learning_rate": 7.5894590846047165e-06, "loss": 0.0528, "step": 3486 }, { "epoch": 2.4156563907170074, "grad_norm": 0.6630606055259705, "learning_rate": 7.5887656033287105e-06, "loss": 0.0459, "step": 3487 }, { "epoch": 2.4163491513682023, "grad_norm": 0.5501273274421692, "learning_rate": 7.5880721220527055e-06, "loss": 0.0417, "step": 3488 }, { "epoch": 2.417041912019397, "grad_norm": 0.6584319472312927, "learning_rate": 7.5873786407766996e-06, "loss": 0.0553, "step": 3489 }, { "epoch": 2.4177346726705924, "grad_norm": 0.7364506125450134, "learning_rate": 7.586685159500694e-06, "loss": 0.0626, "step": 3490 }, { "epoch": 2.4184274333217872, "grad_norm": 0.6536809802055359, "learning_rate": 7.585991678224689e-06, "loss": 0.0504, "step": 3491 }, { "epoch": 2.4191201939729825, "grad_norm": 0.6651002168655396, "learning_rate": 7.585298196948683e-06, "loss": 0.0484, "step": 3492 }, { "epoch": 2.4198129546241773, "grad_norm": 0.6081539988517761, "learning_rate": 7.584604715672678e-06, "loss": 0.0574, "step": 3493 }, { "epoch": 2.420505715275372, "grad_norm": 0.7257691621780396, "learning_rate": 7.583911234396672e-06, "loss": 0.0518, "step": 3494 }, { "epoch": 2.4211984759265675, "grad_norm": 0.742874026298523, "learning_rate": 7.583217753120667e-06, "loss": 0.0581, "step": 3495 }, { "epoch": 2.4218912365777623, "grad_norm": 0.6096038818359375, "learning_rate": 7.5825242718446616e-06, "loss": 0.054, "step": 3496 }, { "epoch": 2.4225839972289576, "grad_norm": 0.7022075057029724, "learning_rate": 7.581830790568655e-06, "loss": 0.0712, "step": 3497 }, { "epoch": 2.4232767578801524, "grad_norm": 0.665908694267273, "learning_rate": 7.58113730929265e-06, "loss": 0.0562, "step": 3498 }, { "epoch": 2.4239695185313472, "grad_norm": 0.8648689985275269, "learning_rate": 7.580443828016644e-06, "loss": 0.0747, "step": 3499 }, { "epoch": 2.4246622791825425, "grad_norm": 0.6802393198013306, "learning_rate": 7.579750346740639e-06, "loss": 0.0584, "step": 3500 }, { "epoch": 2.4253550398337373, "grad_norm": 0.732030987739563, "learning_rate": 7.579056865464634e-06, "loss": 0.0593, "step": 3501 }, { "epoch": 2.4260478004849326, "grad_norm": 0.7286328673362732, "learning_rate": 7.578363384188627e-06, "loss": 0.0488, "step": 3502 }, { "epoch": 2.4267405611361275, "grad_norm": 0.6514286994934082, "learning_rate": 7.577669902912622e-06, "loss": 0.046, "step": 3503 }, { "epoch": 2.4274333217873223, "grad_norm": 0.6425239443778992, "learning_rate": 7.576976421636616e-06, "loss": 0.0567, "step": 3504 }, { "epoch": 2.4281260824385176, "grad_norm": 0.621367871761322, "learning_rate": 7.576282940360611e-06, "loss": 0.0538, "step": 3505 }, { "epoch": 2.4288188430897124, "grad_norm": 0.848767876625061, "learning_rate": 7.575589459084606e-06, "loss": 0.0611, "step": 3506 }, { "epoch": 2.4295116037409077, "grad_norm": 0.7124624252319336, "learning_rate": 7.5748959778086e-06, "loss": 0.0577, "step": 3507 }, { "epoch": 2.4302043643921025, "grad_norm": 0.6805168390274048, "learning_rate": 7.574202496532595e-06, "loss": 0.0585, "step": 3508 }, { "epoch": 2.4308971250432974, "grad_norm": 0.6756210327148438, "learning_rate": 7.573509015256588e-06, "loss": 0.0473, "step": 3509 }, { "epoch": 2.4315898856944926, "grad_norm": 0.6795691251754761, "learning_rate": 7.572815533980583e-06, "loss": 0.0518, "step": 3510 }, { "epoch": 2.4322826463456875, "grad_norm": 0.6456634998321533, "learning_rate": 7.572122052704578e-06, "loss": 0.0489, "step": 3511 }, { "epoch": 2.4329754069968828, "grad_norm": 0.6561073064804077, "learning_rate": 7.571428571428572e-06, "loss": 0.0519, "step": 3512 }, { "epoch": 2.4336681676480776, "grad_norm": 0.8649124503135681, "learning_rate": 7.570735090152567e-06, "loss": 0.0494, "step": 3513 }, { "epoch": 2.4343609282992724, "grad_norm": 0.684529185295105, "learning_rate": 7.57004160887656e-06, "loss": 0.0573, "step": 3514 }, { "epoch": 2.4350536889504677, "grad_norm": 0.8203072547912598, "learning_rate": 7.569348127600555e-06, "loss": 0.0618, "step": 3515 }, { "epoch": 2.4357464496016625, "grad_norm": 0.69808030128479, "learning_rate": 7.56865464632455e-06, "loss": 0.0465, "step": 3516 }, { "epoch": 2.436439210252858, "grad_norm": 0.6848331093788147, "learning_rate": 7.567961165048544e-06, "loss": 0.0596, "step": 3517 }, { "epoch": 2.4371319709040526, "grad_norm": 0.7920529246330261, "learning_rate": 7.567267683772539e-06, "loss": 0.0633, "step": 3518 }, { "epoch": 2.4378247315552475, "grad_norm": 0.6909356117248535, "learning_rate": 7.566574202496532e-06, "loss": 0.0567, "step": 3519 }, { "epoch": 2.4385174922064428, "grad_norm": 0.6651289463043213, "learning_rate": 7.565880721220527e-06, "loss": 0.0432, "step": 3520 }, { "epoch": 2.4392102528576376, "grad_norm": 0.7031319737434387, "learning_rate": 7.565187239944522e-06, "loss": 0.0618, "step": 3521 }, { "epoch": 2.439903013508833, "grad_norm": 0.7217929363250732, "learning_rate": 7.564493758668516e-06, "loss": 0.0582, "step": 3522 }, { "epoch": 2.4405957741600277, "grad_norm": 0.7406728863716125, "learning_rate": 7.563800277392511e-06, "loss": 0.0606, "step": 3523 }, { "epoch": 2.4412885348112225, "grad_norm": 0.6231144070625305, "learning_rate": 7.563106796116505e-06, "loss": 0.0342, "step": 3524 }, { "epoch": 2.441981295462418, "grad_norm": 0.630927562713623, "learning_rate": 7.5624133148405e-06, "loss": 0.051, "step": 3525 }, { "epoch": 2.4426740561136127, "grad_norm": 0.720359206199646, "learning_rate": 7.561719833564495e-06, "loss": 0.0493, "step": 3526 }, { "epoch": 2.443366816764808, "grad_norm": 0.6691286563873291, "learning_rate": 7.561026352288488e-06, "loss": 0.058, "step": 3527 }, { "epoch": 2.4440595774160028, "grad_norm": 0.6614663600921631, "learning_rate": 7.560332871012483e-06, "loss": 0.0497, "step": 3528 }, { "epoch": 2.4447523380671976, "grad_norm": 0.7070098519325256, "learning_rate": 7.559639389736477e-06, "loss": 0.051, "step": 3529 }, { "epoch": 2.445445098718393, "grad_norm": 0.5920138359069824, "learning_rate": 7.558945908460472e-06, "loss": 0.0399, "step": 3530 }, { "epoch": 2.4461378593695877, "grad_norm": 0.659697413444519, "learning_rate": 7.558252427184467e-06, "loss": 0.0554, "step": 3531 }, { "epoch": 2.446830620020783, "grad_norm": 0.6952112317085266, "learning_rate": 7.5575589459084605e-06, "loss": 0.0539, "step": 3532 }, { "epoch": 2.447523380671978, "grad_norm": 0.7827281951904297, "learning_rate": 7.5568654646324555e-06, "loss": 0.0669, "step": 3533 }, { "epoch": 2.4482161413231727, "grad_norm": 0.708702027797699, "learning_rate": 7.5561719833564495e-06, "loss": 0.0476, "step": 3534 }, { "epoch": 2.448908901974368, "grad_norm": 0.8409815430641174, "learning_rate": 7.5554785020804445e-06, "loss": 0.0624, "step": 3535 }, { "epoch": 2.449601662625563, "grad_norm": 0.7902380228042603, "learning_rate": 7.554785020804439e-06, "loss": 0.0595, "step": 3536 }, { "epoch": 2.450294423276758, "grad_norm": 0.8560856580734253, "learning_rate": 7.5540915395284335e-06, "loss": 0.0623, "step": 3537 }, { "epoch": 2.450987183927953, "grad_norm": 0.7568857669830322, "learning_rate": 7.553398058252428e-06, "loss": 0.0548, "step": 3538 }, { "epoch": 2.4516799445791477, "grad_norm": 0.7374472618103027, "learning_rate": 7.552704576976422e-06, "loss": 0.0557, "step": 3539 }, { "epoch": 2.452372705230343, "grad_norm": 0.6479283571243286, "learning_rate": 7.552011095700417e-06, "loss": 0.0406, "step": 3540 }, { "epoch": 2.453065465881538, "grad_norm": 0.6632204055786133, "learning_rate": 7.5513176144244115e-06, "loss": 0.052, "step": 3541 }, { "epoch": 2.453758226532733, "grad_norm": 0.9688203930854797, "learning_rate": 7.550624133148406e-06, "loss": 0.0607, "step": 3542 }, { "epoch": 2.454450987183928, "grad_norm": 0.7763074040412903, "learning_rate": 7.5499306518724006e-06, "loss": 0.0615, "step": 3543 }, { "epoch": 2.455143747835123, "grad_norm": 0.7124184370040894, "learning_rate": 7.549237170596394e-06, "loss": 0.0607, "step": 3544 }, { "epoch": 2.455836508486318, "grad_norm": 0.669926106929779, "learning_rate": 7.548543689320389e-06, "loss": 0.0606, "step": 3545 }, { "epoch": 2.456529269137513, "grad_norm": 0.7063635587692261, "learning_rate": 7.547850208044384e-06, "loss": 0.0639, "step": 3546 }, { "epoch": 2.457222029788708, "grad_norm": 0.5827317833900452, "learning_rate": 7.547156726768378e-06, "loss": 0.0422, "step": 3547 }, { "epoch": 2.457914790439903, "grad_norm": 0.6933485865592957, "learning_rate": 7.546463245492373e-06, "loss": 0.0615, "step": 3548 }, { "epoch": 2.458607551091098, "grad_norm": 0.6758912205696106, "learning_rate": 7.545769764216366e-06, "loss": 0.0537, "step": 3549 }, { "epoch": 2.459300311742293, "grad_norm": 0.6951971650123596, "learning_rate": 7.545076282940361e-06, "loss": 0.06, "step": 3550 }, { "epoch": 2.459993072393488, "grad_norm": 0.8742147088050842, "learning_rate": 7.544382801664356e-06, "loss": 0.0611, "step": 3551 }, { "epoch": 2.4606858330446832, "grad_norm": 0.7043006420135498, "learning_rate": 7.54368932038835e-06, "loss": 0.0603, "step": 3552 }, { "epoch": 2.461378593695878, "grad_norm": 0.6522670984268188, "learning_rate": 7.542995839112345e-06, "loss": 0.0483, "step": 3553 }, { "epoch": 2.462071354347073, "grad_norm": 0.589442253112793, "learning_rate": 7.542302357836339e-06, "loss": 0.0461, "step": 3554 }, { "epoch": 2.462764114998268, "grad_norm": 0.7959421873092651, "learning_rate": 7.541608876560334e-06, "loss": 0.0654, "step": 3555 }, { "epoch": 2.463456875649463, "grad_norm": 0.7357652187347412, "learning_rate": 7.540915395284329e-06, "loss": 0.0584, "step": 3556 }, { "epoch": 2.4641496363006583, "grad_norm": 0.7577213644981384, "learning_rate": 7.540221914008322e-06, "loss": 0.0496, "step": 3557 }, { "epoch": 2.464842396951853, "grad_norm": 1.2063661813735962, "learning_rate": 7.539528432732317e-06, "loss": 0.0624, "step": 3558 }, { "epoch": 2.465535157603048, "grad_norm": 0.594376802444458, "learning_rate": 7.538834951456311e-06, "loss": 0.0562, "step": 3559 }, { "epoch": 2.4662279182542433, "grad_norm": 0.6683982014656067, "learning_rate": 7.538141470180306e-06, "loss": 0.0545, "step": 3560 }, { "epoch": 2.466920678905438, "grad_norm": 0.8373034000396729, "learning_rate": 7.537447988904301e-06, "loss": 0.0618, "step": 3561 }, { "epoch": 2.4676134395566334, "grad_norm": 0.6339336037635803, "learning_rate": 7.536754507628294e-06, "loss": 0.0471, "step": 3562 }, { "epoch": 2.468306200207828, "grad_norm": 0.6370022892951965, "learning_rate": 7.536061026352289e-06, "loss": 0.0495, "step": 3563 }, { "epoch": 2.468998960859023, "grad_norm": 0.8200846314430237, "learning_rate": 7.535367545076283e-06, "loss": 0.069, "step": 3564 }, { "epoch": 2.4696917215102183, "grad_norm": 0.7583621144294739, "learning_rate": 7.534674063800278e-06, "loss": 0.0568, "step": 3565 }, { "epoch": 2.470384482161413, "grad_norm": 0.6182228922843933, "learning_rate": 7.533980582524273e-06, "loss": 0.0449, "step": 3566 }, { "epoch": 2.4710772428126084, "grad_norm": 0.7657793760299683, "learning_rate": 7.533287101248266e-06, "loss": 0.0542, "step": 3567 }, { "epoch": 2.4717700034638033, "grad_norm": 0.6714324951171875, "learning_rate": 7.532593619972261e-06, "loss": 0.0574, "step": 3568 }, { "epoch": 2.472462764114998, "grad_norm": 0.6751946210861206, "learning_rate": 7.531900138696255e-06, "loss": 0.0423, "step": 3569 }, { "epoch": 2.4731555247661934, "grad_norm": 0.7521834969520569, "learning_rate": 7.53120665742025e-06, "loss": 0.0585, "step": 3570 }, { "epoch": 2.473848285417388, "grad_norm": 0.6951082348823547, "learning_rate": 7.530513176144245e-06, "loss": 0.052, "step": 3571 }, { "epoch": 2.4745410460685835, "grad_norm": 0.7604899406433105, "learning_rate": 7.529819694868239e-06, "loss": 0.0554, "step": 3572 }, { "epoch": 2.4752338067197783, "grad_norm": 0.5871224403381348, "learning_rate": 7.529126213592234e-06, "loss": 0.0429, "step": 3573 }, { "epoch": 2.475926567370973, "grad_norm": 0.7135995626449585, "learning_rate": 7.528432732316227e-06, "loss": 0.0427, "step": 3574 }, { "epoch": 2.4766193280221684, "grad_norm": 0.5908096432685852, "learning_rate": 7.527739251040222e-06, "loss": 0.0432, "step": 3575 }, { "epoch": 2.4773120886733633, "grad_norm": 0.7182779908180237, "learning_rate": 7.527045769764217e-06, "loss": 0.0605, "step": 3576 }, { "epoch": 2.4780048493245586, "grad_norm": 0.7429741621017456, "learning_rate": 7.526352288488211e-06, "loss": 0.0562, "step": 3577 }, { "epoch": 2.4786976099757534, "grad_norm": 0.5671314001083374, "learning_rate": 7.525658807212206e-06, "loss": 0.0435, "step": 3578 }, { "epoch": 2.4793903706269482, "grad_norm": 0.7295961380004883, "learning_rate": 7.5249653259361995e-06, "loss": 0.0536, "step": 3579 }, { "epoch": 2.4800831312781435, "grad_norm": 0.7264970541000366, "learning_rate": 7.5242718446601945e-06, "loss": 0.0673, "step": 3580 }, { "epoch": 2.4807758919293383, "grad_norm": 0.6332563757896423, "learning_rate": 7.523578363384189e-06, "loss": 0.0436, "step": 3581 }, { "epoch": 2.4814686525805336, "grad_norm": 0.6721775531768799, "learning_rate": 7.5228848821081835e-06, "loss": 0.0566, "step": 3582 }, { "epoch": 2.4821614132317285, "grad_norm": 0.7073386311531067, "learning_rate": 7.522191400832178e-06, "loss": 0.0516, "step": 3583 }, { "epoch": 2.4828541738829233, "grad_norm": 0.6986554265022278, "learning_rate": 7.5214979195561725e-06, "loss": 0.0488, "step": 3584 }, { "epoch": 2.4835469345341186, "grad_norm": 0.5938754677772522, "learning_rate": 7.5208044382801674e-06, "loss": 0.0517, "step": 3585 }, { "epoch": 2.4842396951853134, "grad_norm": 0.8253476619720459, "learning_rate": 7.5201109570041615e-06, "loss": 0.0713, "step": 3586 }, { "epoch": 2.4849324558365087, "grad_norm": 0.688849925994873, "learning_rate": 7.519417475728156e-06, "loss": 0.0493, "step": 3587 }, { "epoch": 2.4856252164877035, "grad_norm": 0.8110700249671936, "learning_rate": 7.5187239944521505e-06, "loss": 0.078, "step": 3588 }, { "epoch": 2.4863179771388983, "grad_norm": 0.7634989023208618, "learning_rate": 7.518030513176145e-06, "loss": 0.0565, "step": 3589 }, { "epoch": 2.4870107377900936, "grad_norm": 0.7390944361686707, "learning_rate": 7.5173370319001396e-06, "loss": 0.0532, "step": 3590 }, { "epoch": 2.4877034984412885, "grad_norm": 0.6951395869255066, "learning_rate": 7.5166435506241345e-06, "loss": 0.0501, "step": 3591 }, { "epoch": 2.4883962590924833, "grad_norm": 0.600458025932312, "learning_rate": 7.515950069348128e-06, "loss": 0.0449, "step": 3592 }, { "epoch": 2.4890890197436786, "grad_norm": 0.6846388578414917, "learning_rate": 7.515256588072123e-06, "loss": 0.0462, "step": 3593 }, { "epoch": 2.4897817803948734, "grad_norm": 0.589938223361969, "learning_rate": 7.514563106796117e-06, "loss": 0.0467, "step": 3594 }, { "epoch": 2.4904745410460687, "grad_norm": 0.6740636229515076, "learning_rate": 7.513869625520112e-06, "loss": 0.046, "step": 3595 }, { "epoch": 2.4911673016972635, "grad_norm": 0.6663030982017517, "learning_rate": 7.513176144244107e-06, "loss": 0.042, "step": 3596 }, { "epoch": 2.491860062348459, "grad_norm": 0.6415688991546631, "learning_rate": 7.5124826629681e-06, "loss": 0.0496, "step": 3597 }, { "epoch": 2.4925528229996536, "grad_norm": 0.6303988099098206, "learning_rate": 7.511789181692095e-06, "loss": 0.0506, "step": 3598 }, { "epoch": 2.4932455836508485, "grad_norm": 0.7231244444847107, "learning_rate": 7.511095700416089e-06, "loss": 0.0604, "step": 3599 }, { "epoch": 2.4939383443020438, "grad_norm": 0.7289586663246155, "learning_rate": 7.510402219140084e-06, "loss": 0.055, "step": 3600 }, { "epoch": 2.4946311049532386, "grad_norm": 0.8104788661003113, "learning_rate": 7.509708737864079e-06, "loss": 0.0504, "step": 3601 }, { "epoch": 2.4953238656044334, "grad_norm": 0.6668550968170166, "learning_rate": 7.509015256588073e-06, "loss": 0.0458, "step": 3602 }, { "epoch": 2.4960166262556287, "grad_norm": 0.7586086988449097, "learning_rate": 7.508321775312068e-06, "loss": 0.0694, "step": 3603 }, { "epoch": 2.4967093869068235, "grad_norm": 0.6411376595497131, "learning_rate": 7.507628294036061e-06, "loss": 0.0539, "step": 3604 }, { "epoch": 2.497402147558019, "grad_norm": 0.8673600554466248, "learning_rate": 7.506934812760056e-06, "loss": 0.0636, "step": 3605 }, { "epoch": 2.4980949082092136, "grad_norm": 0.6442834734916687, "learning_rate": 7.506241331484051e-06, "loss": 0.0542, "step": 3606 }, { "epoch": 2.498787668860409, "grad_norm": 0.702248215675354, "learning_rate": 7.505547850208045e-06, "loss": 0.0582, "step": 3607 }, { "epoch": 2.4994804295116038, "grad_norm": 0.7120597958564758, "learning_rate": 7.50485436893204e-06, "loss": 0.0658, "step": 3608 }, { "epoch": 2.5001731901627986, "grad_norm": 0.8212564587593079, "learning_rate": 7.504160887656033e-06, "loss": 0.0655, "step": 3609 }, { "epoch": 2.500865950813994, "grad_norm": 0.6543794870376587, "learning_rate": 7.503467406380028e-06, "loss": 0.0594, "step": 3610 }, { "epoch": 2.5015587114651887, "grad_norm": 0.6933072209358215, "learning_rate": 7.502773925104023e-06, "loss": 0.0632, "step": 3611 }, { "epoch": 2.5022514721163835, "grad_norm": 0.6953290104866028, "learning_rate": 7.502080443828017e-06, "loss": 0.0547, "step": 3612 }, { "epoch": 2.502944232767579, "grad_norm": 0.6507391929626465, "learning_rate": 7.501386962552012e-06, "loss": 0.0456, "step": 3613 }, { "epoch": 2.5036369934187737, "grad_norm": 0.6569274067878723, "learning_rate": 7.500693481276006e-06, "loss": 0.0493, "step": 3614 }, { "epoch": 2.504329754069969, "grad_norm": 0.6933046579360962, "learning_rate": 7.500000000000001e-06, "loss": 0.0638, "step": 3615 }, { "epoch": 2.5050225147211638, "grad_norm": 0.6412826776504517, "learning_rate": 7.499306518723995e-06, "loss": 0.0477, "step": 3616 }, { "epoch": 2.505715275372359, "grad_norm": 0.7654886841773987, "learning_rate": 7.498613037447989e-06, "loss": 0.0509, "step": 3617 }, { "epoch": 2.506408036023554, "grad_norm": 0.679672122001648, "learning_rate": 7.497919556171984e-06, "loss": 0.0534, "step": 3618 }, { "epoch": 2.5071007966747487, "grad_norm": 0.6829155683517456, "learning_rate": 7.497226074895978e-06, "loss": 0.0584, "step": 3619 }, { "epoch": 2.507793557325944, "grad_norm": 0.8345833420753479, "learning_rate": 7.496532593619973e-06, "loss": 0.0678, "step": 3620 }, { "epoch": 2.508486317977139, "grad_norm": 0.6249558925628662, "learning_rate": 7.495839112343968e-06, "loss": 0.046, "step": 3621 }, { "epoch": 2.5091790786283337, "grad_norm": 0.7350931763648987, "learning_rate": 7.495145631067961e-06, "loss": 0.0654, "step": 3622 }, { "epoch": 2.509871839279529, "grad_norm": 0.6759945154190063, "learning_rate": 7.494452149791956e-06, "loss": 0.0645, "step": 3623 }, { "epoch": 2.510564599930724, "grad_norm": 0.6918412446975708, "learning_rate": 7.49375866851595e-06, "loss": 0.0437, "step": 3624 }, { "epoch": 2.511257360581919, "grad_norm": 0.7094055414199829, "learning_rate": 7.493065187239945e-06, "loss": 0.0638, "step": 3625 }, { "epoch": 2.511950121233114, "grad_norm": 0.5930887460708618, "learning_rate": 7.49237170596394e-06, "loss": 0.0402, "step": 3626 }, { "epoch": 2.512642881884309, "grad_norm": 0.912276566028595, "learning_rate": 7.4916782246879335e-06, "loss": 0.0678, "step": 3627 }, { "epoch": 2.513335642535504, "grad_norm": 0.654692530632019, "learning_rate": 7.490984743411928e-06, "loss": 0.0532, "step": 3628 }, { "epoch": 2.514028403186699, "grad_norm": 0.6371455788612366, "learning_rate": 7.4902912621359225e-06, "loss": 0.0547, "step": 3629 }, { "epoch": 2.514721163837894, "grad_norm": 0.7303754091262817, "learning_rate": 7.489597780859917e-06, "loss": 0.0495, "step": 3630 }, { "epoch": 2.515413924489089, "grad_norm": 0.8115444183349609, "learning_rate": 7.488904299583912e-06, "loss": 0.0718, "step": 3631 }, { "epoch": 2.516106685140284, "grad_norm": 0.7716137766838074, "learning_rate": 7.4882108183079064e-06, "loss": 0.0735, "step": 3632 }, { "epoch": 2.516799445791479, "grad_norm": 0.6851175427436829, "learning_rate": 7.487517337031901e-06, "loss": 0.0537, "step": 3633 }, { "epoch": 2.517492206442674, "grad_norm": 0.6670947670936584, "learning_rate": 7.486823855755895e-06, "loss": 0.0482, "step": 3634 }, { "epoch": 2.518184967093869, "grad_norm": 0.6720465421676636, "learning_rate": 7.4861303744798895e-06, "loss": 0.0443, "step": 3635 }, { "epoch": 2.518877727745064, "grad_norm": 0.6166452169418335, "learning_rate": 7.4854368932038845e-06, "loss": 0.0526, "step": 3636 }, { "epoch": 2.5195704883962593, "grad_norm": 0.6722489595413208, "learning_rate": 7.4847434119278786e-06, "loss": 0.0406, "step": 3637 }, { "epoch": 2.520263249047454, "grad_norm": 0.7445875406265259, "learning_rate": 7.4840499306518735e-06, "loss": 0.0636, "step": 3638 }, { "epoch": 2.520956009698649, "grad_norm": 0.7646205425262451, "learning_rate": 7.483356449375867e-06, "loss": 0.0542, "step": 3639 }, { "epoch": 2.5216487703498442, "grad_norm": 0.6960675716400146, "learning_rate": 7.482662968099862e-06, "loss": 0.0592, "step": 3640 }, { "epoch": 2.522341531001039, "grad_norm": 0.7878941893577576, "learning_rate": 7.481969486823857e-06, "loss": 0.0593, "step": 3641 }, { "epoch": 2.523034291652234, "grad_norm": 0.8090975880622864, "learning_rate": 7.481276005547851e-06, "loss": 0.0661, "step": 3642 }, { "epoch": 2.523727052303429, "grad_norm": 0.6913262009620667, "learning_rate": 7.480582524271846e-06, "loss": 0.0477, "step": 3643 }, { "epoch": 2.524419812954624, "grad_norm": 0.6624709367752075, "learning_rate": 7.47988904299584e-06, "loss": 0.0413, "step": 3644 }, { "epoch": 2.5251125736058193, "grad_norm": 0.863525927066803, "learning_rate": 7.479195561719834e-06, "loss": 0.0467, "step": 3645 }, { "epoch": 2.525805334257014, "grad_norm": 0.6592664122581482, "learning_rate": 7.478502080443829e-06, "loss": 0.0459, "step": 3646 }, { "epoch": 2.5264980949082094, "grad_norm": 0.6367437243461609, "learning_rate": 7.477808599167823e-06, "loss": 0.036, "step": 3647 }, { "epoch": 2.5271908555594043, "grad_norm": 0.7011104822158813, "learning_rate": 7.477115117891818e-06, "loss": 0.0436, "step": 3648 }, { "epoch": 2.527883616210599, "grad_norm": 0.7507541179656982, "learning_rate": 7.476421636615812e-06, "loss": 0.0488, "step": 3649 }, { "epoch": 2.5285763768617944, "grad_norm": 0.7038018703460693, "learning_rate": 7.475728155339807e-06, "loss": 0.0369, "step": 3650 }, { "epoch": 2.529269137512989, "grad_norm": 0.7123277187347412, "learning_rate": 7.475034674063802e-06, "loss": 0.052, "step": 3651 }, { "epoch": 2.529961898164184, "grad_norm": 0.6427280306816101, "learning_rate": 7.474341192787795e-06, "loss": 0.0437, "step": 3652 }, { "epoch": 2.5306546588153793, "grad_norm": 0.6419147849082947, "learning_rate": 7.47364771151179e-06, "loss": 0.048, "step": 3653 }, { "epoch": 2.531347419466574, "grad_norm": 0.7547383308410645, "learning_rate": 7.472954230235784e-06, "loss": 0.0609, "step": 3654 }, { "epoch": 2.5320401801177694, "grad_norm": 0.748364269733429, "learning_rate": 7.472260748959779e-06, "loss": 0.0601, "step": 3655 }, { "epoch": 2.5327329407689643, "grad_norm": 0.7975109219551086, "learning_rate": 7.471567267683774e-06, "loss": 0.0582, "step": 3656 }, { "epoch": 2.5334257014201595, "grad_norm": 0.7377816438674927, "learning_rate": 7.470873786407767e-06, "loss": 0.0607, "step": 3657 }, { "epoch": 2.5341184620713544, "grad_norm": 0.7805709838867188, "learning_rate": 7.470180305131762e-06, "loss": 0.0727, "step": 3658 }, { "epoch": 2.534811222722549, "grad_norm": 0.7987939119338989, "learning_rate": 7.469486823855756e-06, "loss": 0.0602, "step": 3659 }, { "epoch": 2.5355039833737445, "grad_norm": 0.7342275381088257, "learning_rate": 7.468793342579751e-06, "loss": 0.0633, "step": 3660 }, { "epoch": 2.5361967440249393, "grad_norm": 0.7222427129745483, "learning_rate": 7.468099861303746e-06, "loss": 0.0514, "step": 3661 }, { "epoch": 2.536889504676134, "grad_norm": 0.7093556523323059, "learning_rate": 7.46740638002774e-06, "loss": 0.06, "step": 3662 }, { "epoch": 2.5375822653273294, "grad_norm": 0.7948033809661865, "learning_rate": 7.466712898751735e-06, "loss": 0.0564, "step": 3663 }, { "epoch": 2.5382750259785243, "grad_norm": 0.7399566173553467, "learning_rate": 7.466019417475728e-06, "loss": 0.0563, "step": 3664 }, { "epoch": 2.5389677866297196, "grad_norm": 0.7637043595314026, "learning_rate": 7.465325936199723e-06, "loss": 0.0689, "step": 3665 }, { "epoch": 2.5396605472809144, "grad_norm": 0.657764732837677, "learning_rate": 7.464632454923718e-06, "loss": 0.0432, "step": 3666 }, { "epoch": 2.5403533079321097, "grad_norm": 0.6431865096092224, "learning_rate": 7.463938973647712e-06, "loss": 0.0391, "step": 3667 }, { "epoch": 2.5410460685833045, "grad_norm": 0.6540981531143188, "learning_rate": 7.463245492371707e-06, "loss": 0.0447, "step": 3668 }, { "epoch": 2.5417388292344993, "grad_norm": 0.6576544046401978, "learning_rate": 7.4625520110957e-06, "loss": 0.0508, "step": 3669 }, { "epoch": 2.5424315898856946, "grad_norm": 0.8123604655265808, "learning_rate": 7.461858529819695e-06, "loss": 0.0706, "step": 3670 }, { "epoch": 2.5431243505368895, "grad_norm": 0.8841568231582642, "learning_rate": 7.46116504854369e-06, "loss": 0.0467, "step": 3671 }, { "epoch": 2.5438171111880843, "grad_norm": 0.7083867788314819, "learning_rate": 7.460471567267684e-06, "loss": 0.0654, "step": 3672 }, { "epoch": 2.5445098718392796, "grad_norm": 0.6732116341590881, "learning_rate": 7.459778085991679e-06, "loss": 0.0502, "step": 3673 }, { "epoch": 2.5452026324904744, "grad_norm": 0.6401447653770447, "learning_rate": 7.4590846047156725e-06, "loss": 0.04, "step": 3674 }, { "epoch": 2.5458953931416697, "grad_norm": 0.6104236245155334, "learning_rate": 7.458391123439667e-06, "loss": 0.0427, "step": 3675 }, { "epoch": 2.5465881537928645, "grad_norm": 0.9199113845825195, "learning_rate": 7.457697642163662e-06, "loss": 0.0601, "step": 3676 }, { "epoch": 2.54728091444406, "grad_norm": 0.7299613952636719, "learning_rate": 7.457004160887656e-06, "loss": 0.0601, "step": 3677 }, { "epoch": 2.5479736750952546, "grad_norm": 0.678725004196167, "learning_rate": 7.456310679611651e-06, "loss": 0.0492, "step": 3678 }, { "epoch": 2.5486664357464495, "grad_norm": 0.7396339178085327, "learning_rate": 7.4556171983356454e-06, "loss": 0.0465, "step": 3679 }, { "epoch": 2.5493591963976447, "grad_norm": 0.7016897797584534, "learning_rate": 7.45492371705964e-06, "loss": 0.0634, "step": 3680 }, { "epoch": 2.5500519570488396, "grad_norm": 0.6942267417907715, "learning_rate": 7.454230235783635e-06, "loss": 0.0471, "step": 3681 }, { "epoch": 2.5507447177000344, "grad_norm": 0.6844359040260315, "learning_rate": 7.4535367545076285e-06, "loss": 0.0564, "step": 3682 }, { "epoch": 2.5514374783512297, "grad_norm": 0.6959248781204224, "learning_rate": 7.4528432732316235e-06, "loss": 0.0445, "step": 3683 }, { "epoch": 2.5521302390024245, "grad_norm": 0.6269494891166687, "learning_rate": 7.4521497919556176e-06, "loss": 0.0393, "step": 3684 }, { "epoch": 2.55282299965362, "grad_norm": 0.6538231372833252, "learning_rate": 7.4514563106796125e-06, "loss": 0.055, "step": 3685 }, { "epoch": 2.5535157603048146, "grad_norm": 0.9394417405128479, "learning_rate": 7.4507628294036074e-06, "loss": 0.0826, "step": 3686 }, { "epoch": 2.55420852095601, "grad_norm": 0.7278565168380737, "learning_rate": 7.450069348127601e-06, "loss": 0.0533, "step": 3687 }, { "epoch": 2.5549012816072048, "grad_norm": 0.6613569259643555, "learning_rate": 7.449375866851596e-06, "loss": 0.0539, "step": 3688 }, { "epoch": 2.5555940422583996, "grad_norm": 0.7118561863899231, "learning_rate": 7.44868238557559e-06, "loss": 0.0542, "step": 3689 }, { "epoch": 2.556286802909595, "grad_norm": 0.61842280626297, "learning_rate": 7.447988904299585e-06, "loss": 0.0453, "step": 3690 }, { "epoch": 2.5569795635607897, "grad_norm": 0.6063533425331116, "learning_rate": 7.4472954230235796e-06, "loss": 0.0535, "step": 3691 }, { "epoch": 2.5576723242119845, "grad_norm": 0.7033942937850952, "learning_rate": 7.446601941747574e-06, "loss": 0.0534, "step": 3692 }, { "epoch": 2.55836508486318, "grad_norm": 0.6714750528335571, "learning_rate": 7.445908460471568e-06, "loss": 0.0581, "step": 3693 }, { "epoch": 2.5590578455143747, "grad_norm": 0.707249641418457, "learning_rate": 7.445214979195562e-06, "loss": 0.0593, "step": 3694 }, { "epoch": 2.55975060616557, "grad_norm": 0.6404327750205994, "learning_rate": 7.444521497919557e-06, "loss": 0.0448, "step": 3695 }, { "epoch": 2.5604433668167648, "grad_norm": 0.6509929299354553, "learning_rate": 7.443828016643552e-06, "loss": 0.0489, "step": 3696 }, { "epoch": 2.56113612746796, "grad_norm": 0.6667312383651733, "learning_rate": 7.443134535367546e-06, "loss": 0.0476, "step": 3697 }, { "epoch": 2.561828888119155, "grad_norm": 0.6964579224586487, "learning_rate": 7.442441054091541e-06, "loss": 0.0631, "step": 3698 }, { "epoch": 2.5625216487703497, "grad_norm": 0.6812249422073364, "learning_rate": 7.441747572815534e-06, "loss": 0.0583, "step": 3699 }, { "epoch": 2.563214409421545, "grad_norm": 0.6796555519104004, "learning_rate": 7.441054091539529e-06, "loss": 0.0618, "step": 3700 }, { "epoch": 2.56390717007274, "grad_norm": 0.7967904210090637, "learning_rate": 7.440360610263524e-06, "loss": 0.0562, "step": 3701 }, { "epoch": 2.5645999307239347, "grad_norm": 0.7128233909606934, "learning_rate": 7.439667128987518e-06, "loss": 0.0566, "step": 3702 }, { "epoch": 2.56529269137513, "grad_norm": 0.7470800280570984, "learning_rate": 7.438973647711513e-06, "loss": 0.062, "step": 3703 }, { "epoch": 2.5659854520263248, "grad_norm": 0.6518545150756836, "learning_rate": 7.438280166435506e-06, "loss": 0.051, "step": 3704 }, { "epoch": 2.56667821267752, "grad_norm": 0.6771643757820129, "learning_rate": 7.437586685159501e-06, "loss": 0.0537, "step": 3705 }, { "epoch": 2.567370973328715, "grad_norm": 0.6569119691848755, "learning_rate": 7.436893203883496e-06, "loss": 0.0452, "step": 3706 }, { "epoch": 2.56806373397991, "grad_norm": 0.7679269909858704, "learning_rate": 7.43619972260749e-06, "loss": 0.0578, "step": 3707 }, { "epoch": 2.568756494631105, "grad_norm": 0.7990003228187561, "learning_rate": 7.435506241331485e-06, "loss": 0.0688, "step": 3708 }, { "epoch": 2.5694492552823, "grad_norm": 0.7669041156768799, "learning_rate": 7.434812760055479e-06, "loss": 0.0665, "step": 3709 }, { "epoch": 2.570142015933495, "grad_norm": 0.6859337091445923, "learning_rate": 7.434119278779474e-06, "loss": 0.0571, "step": 3710 }, { "epoch": 2.57083477658469, "grad_norm": 0.6683074831962585, "learning_rate": 7.433425797503469e-06, "loss": 0.0491, "step": 3711 }, { "epoch": 2.571527537235885, "grad_norm": 0.6565935611724854, "learning_rate": 7.432732316227462e-06, "loss": 0.0554, "step": 3712 }, { "epoch": 2.57222029788708, "grad_norm": 0.7693780660629272, "learning_rate": 7.432038834951457e-06, "loss": 0.0686, "step": 3713 }, { "epoch": 2.572913058538275, "grad_norm": 0.7004335522651672, "learning_rate": 7.431345353675451e-06, "loss": 0.0581, "step": 3714 }, { "epoch": 2.57360581918947, "grad_norm": 0.6624301075935364, "learning_rate": 7.430651872399446e-06, "loss": 0.0513, "step": 3715 }, { "epoch": 2.574298579840665, "grad_norm": 0.6729522943496704, "learning_rate": 7.429958391123441e-06, "loss": 0.0477, "step": 3716 }, { "epoch": 2.5749913404918603, "grad_norm": 0.7227064967155457, "learning_rate": 7.429264909847434e-06, "loss": 0.0543, "step": 3717 }, { "epoch": 2.575684101143055, "grad_norm": 0.7444384694099426, "learning_rate": 7.428571428571429e-06, "loss": 0.0491, "step": 3718 }, { "epoch": 2.57637686179425, "grad_norm": 0.7027382850646973, "learning_rate": 7.427877947295423e-06, "loss": 0.0582, "step": 3719 }, { "epoch": 2.5770696224454452, "grad_norm": 0.7209217548370361, "learning_rate": 7.427184466019418e-06, "loss": 0.0618, "step": 3720 }, { "epoch": 2.57776238309664, "grad_norm": 0.6173632740974426, "learning_rate": 7.426490984743413e-06, "loss": 0.0598, "step": 3721 }, { "epoch": 2.578455143747835, "grad_norm": 0.8661364316940308, "learning_rate": 7.425797503467406e-06, "loss": 0.0627, "step": 3722 }, { "epoch": 2.57914790439903, "grad_norm": 0.7888517379760742, "learning_rate": 7.425104022191401e-06, "loss": 0.074, "step": 3723 }, { "epoch": 2.579840665050225, "grad_norm": 0.6954256296157837, "learning_rate": 7.424410540915395e-06, "loss": 0.0542, "step": 3724 }, { "epoch": 2.5805334257014203, "grad_norm": 1.132676362991333, "learning_rate": 7.42371705963939e-06, "loss": 0.0602, "step": 3725 }, { "epoch": 2.581226186352615, "grad_norm": 0.7591542601585388, "learning_rate": 7.423023578363385e-06, "loss": 0.0621, "step": 3726 }, { "epoch": 2.5819189470038104, "grad_norm": 0.6864979267120361, "learning_rate": 7.422330097087379e-06, "loss": 0.0474, "step": 3727 }, { "epoch": 2.5826117076550053, "grad_norm": 0.6854942440986633, "learning_rate": 7.421636615811374e-06, "loss": 0.0481, "step": 3728 }, { "epoch": 2.5833044683062, "grad_norm": 0.8545223474502563, "learning_rate": 7.4209431345353675e-06, "loss": 0.0613, "step": 3729 }, { "epoch": 2.5839972289573954, "grad_norm": 0.699947714805603, "learning_rate": 7.4202496532593625e-06, "loss": 0.0574, "step": 3730 }, { "epoch": 2.58468998960859, "grad_norm": 0.7740626931190491, "learning_rate": 7.419556171983357e-06, "loss": 0.0513, "step": 3731 }, { "epoch": 2.585382750259785, "grad_norm": 0.6306399703025818, "learning_rate": 7.4188626907073515e-06, "loss": 0.047, "step": 3732 }, { "epoch": 2.5860755109109803, "grad_norm": 0.7971992492675781, "learning_rate": 7.4181692094313464e-06, "loss": 0.0618, "step": 3733 }, { "epoch": 2.586768271562175, "grad_norm": 0.7833073735237122, "learning_rate": 7.41747572815534e-06, "loss": 0.0512, "step": 3734 }, { "epoch": 2.5874610322133704, "grad_norm": 0.6905516386032104, "learning_rate": 7.416782246879335e-06, "loss": 0.0526, "step": 3735 }, { "epoch": 2.5881537928645653, "grad_norm": 0.8069033622741699, "learning_rate": 7.4160887656033295e-06, "loss": 0.0674, "step": 3736 }, { "epoch": 2.5888465535157605, "grad_norm": 0.7322914600372314, "learning_rate": 7.415395284327324e-06, "loss": 0.0582, "step": 3737 }, { "epoch": 2.5895393141669554, "grad_norm": 0.6214407682418823, "learning_rate": 7.4147018030513186e-06, "loss": 0.0344, "step": 3738 }, { "epoch": 2.59023207481815, "grad_norm": 0.7403656840324402, "learning_rate": 7.414008321775313e-06, "loss": 0.0589, "step": 3739 }, { "epoch": 2.5909248354693455, "grad_norm": 0.6851581931114197, "learning_rate": 7.4133148404993076e-06, "loss": 0.061, "step": 3740 }, { "epoch": 2.5916175961205403, "grad_norm": 0.6936320066452026, "learning_rate": 7.412621359223302e-06, "loss": 0.0521, "step": 3741 }, { "epoch": 2.592310356771735, "grad_norm": 0.7396749258041382, "learning_rate": 7.411927877947296e-06, "loss": 0.0614, "step": 3742 }, { "epoch": 2.5930031174229304, "grad_norm": 0.6849899291992188, "learning_rate": 7.411234396671291e-06, "loss": 0.05, "step": 3743 }, { "epoch": 2.5936958780741253, "grad_norm": 0.8668308854103088, "learning_rate": 7.410540915395285e-06, "loss": 0.0688, "step": 3744 }, { "epoch": 2.5943886387253206, "grad_norm": 0.7260075211524963, "learning_rate": 7.40984743411928e-06, "loss": 0.064, "step": 3745 }, { "epoch": 2.5950813993765154, "grad_norm": 0.8617621660232544, "learning_rate": 7.409153952843275e-06, "loss": 0.0644, "step": 3746 }, { "epoch": 2.5957741600277107, "grad_norm": 0.6431972980499268, "learning_rate": 7.408460471567268e-06, "loss": 0.0535, "step": 3747 }, { "epoch": 2.5964669206789055, "grad_norm": 0.7590421438217163, "learning_rate": 7.407766990291263e-06, "loss": 0.0529, "step": 3748 }, { "epoch": 2.5971596813301003, "grad_norm": 0.8376240134239197, "learning_rate": 7.407073509015257e-06, "loss": 0.0634, "step": 3749 }, { "epoch": 2.5978524419812956, "grad_norm": 0.5832990407943726, "learning_rate": 7.406380027739252e-06, "loss": 0.0357, "step": 3750 }, { "epoch": 2.5985452026324904, "grad_norm": 0.6788563132286072, "learning_rate": 7.405686546463247e-06, "loss": 0.0624, "step": 3751 }, { "epoch": 2.5992379632836853, "grad_norm": 0.8296613097190857, "learning_rate": 7.40499306518724e-06, "loss": 0.0553, "step": 3752 }, { "epoch": 2.5999307239348806, "grad_norm": 0.7007006406784058, "learning_rate": 7.404299583911235e-06, "loss": 0.0465, "step": 3753 }, { "epoch": 2.6006234845860754, "grad_norm": 0.6576237082481384, "learning_rate": 7.403606102635229e-06, "loss": 0.0453, "step": 3754 }, { "epoch": 2.6013162452372707, "grad_norm": 0.6901983022689819, "learning_rate": 7.402912621359224e-06, "loss": 0.0573, "step": 3755 }, { "epoch": 2.6020090058884655, "grad_norm": 0.6307435631752014, "learning_rate": 7.402219140083219e-06, "loss": 0.0479, "step": 3756 }, { "epoch": 2.602701766539661, "grad_norm": 0.5679149627685547, "learning_rate": 7.401525658807213e-06, "loss": 0.038, "step": 3757 }, { "epoch": 2.6033945271908556, "grad_norm": 0.7377214431762695, "learning_rate": 7.400832177531208e-06, "loss": 0.0553, "step": 3758 }, { "epoch": 2.6040872878420505, "grad_norm": 0.6187472939491272, "learning_rate": 7.400138696255201e-06, "loss": 0.041, "step": 3759 }, { "epoch": 2.6047800484932457, "grad_norm": 1.044419527053833, "learning_rate": 7.399445214979196e-06, "loss": 0.0686, "step": 3760 }, { "epoch": 2.6054728091444406, "grad_norm": 0.6558099389076233, "learning_rate": 7.398751733703191e-06, "loss": 0.0551, "step": 3761 }, { "epoch": 2.6061655697956354, "grad_norm": 0.6940314769744873, "learning_rate": 7.398058252427185e-06, "loss": 0.0474, "step": 3762 }, { "epoch": 2.6068583304468307, "grad_norm": 0.6689845323562622, "learning_rate": 7.39736477115118e-06, "loss": 0.0433, "step": 3763 }, { "epoch": 2.6075510910980255, "grad_norm": 0.688528835773468, "learning_rate": 7.396671289875173e-06, "loss": 0.0686, "step": 3764 }, { "epoch": 2.608243851749221, "grad_norm": 0.747090756893158, "learning_rate": 7.395977808599168e-06, "loss": 0.0538, "step": 3765 }, { "epoch": 2.6089366124004156, "grad_norm": 0.6368103623390198, "learning_rate": 7.395284327323163e-06, "loss": 0.0449, "step": 3766 }, { "epoch": 2.609629373051611, "grad_norm": 0.7641220092773438, "learning_rate": 7.394590846047157e-06, "loss": 0.0643, "step": 3767 }, { "epoch": 2.6103221337028057, "grad_norm": 0.7362112402915955, "learning_rate": 7.393897364771152e-06, "loss": 0.0633, "step": 3768 }, { "epoch": 2.6110148943540006, "grad_norm": 0.6080925464630127, "learning_rate": 7.393203883495146e-06, "loss": 0.0476, "step": 3769 }, { "epoch": 2.611707655005196, "grad_norm": 0.6577709913253784, "learning_rate": 7.39251040221914e-06, "loss": 0.047, "step": 3770 }, { "epoch": 2.6124004156563907, "grad_norm": 0.8339235186576843, "learning_rate": 7.391816920943135e-06, "loss": 0.0566, "step": 3771 }, { "epoch": 2.6130931763075855, "grad_norm": 0.6208629012107849, "learning_rate": 7.391123439667129e-06, "loss": 0.0461, "step": 3772 }, { "epoch": 2.613785936958781, "grad_norm": 0.6897596120834351, "learning_rate": 7.390429958391124e-06, "loss": 0.0494, "step": 3773 }, { "epoch": 2.6144786976099756, "grad_norm": 0.6946137547492981, "learning_rate": 7.389736477115118e-06, "loss": 0.049, "step": 3774 }, { "epoch": 2.615171458261171, "grad_norm": 0.6451416015625, "learning_rate": 7.389042995839113e-06, "loss": 0.0481, "step": 3775 }, { "epoch": 2.6158642189123658, "grad_norm": 0.7888498306274414, "learning_rate": 7.388349514563108e-06, "loss": 0.0563, "step": 3776 }, { "epoch": 2.616556979563561, "grad_norm": 0.6911794543266296, "learning_rate": 7.3876560332871015e-06, "loss": 0.0538, "step": 3777 }, { "epoch": 2.617249740214756, "grad_norm": 0.6358610987663269, "learning_rate": 7.386962552011096e-06, "loss": 0.0589, "step": 3778 }, { "epoch": 2.6179425008659507, "grad_norm": 0.6725031137466431, "learning_rate": 7.3862690707350905e-06, "loss": 0.0567, "step": 3779 }, { "epoch": 2.618635261517146, "grad_norm": 0.6633414030075073, "learning_rate": 7.3855755894590854e-06, "loss": 0.0378, "step": 3780 }, { "epoch": 2.619328022168341, "grad_norm": 0.8340132236480713, "learning_rate": 7.38488210818308e-06, "loss": 0.0588, "step": 3781 }, { "epoch": 2.6200207828195357, "grad_norm": 0.6586992144584656, "learning_rate": 7.384188626907074e-06, "loss": 0.0527, "step": 3782 }, { "epoch": 2.620713543470731, "grad_norm": 0.8827303051948547, "learning_rate": 7.3834951456310685e-06, "loss": 0.0596, "step": 3783 }, { "epoch": 2.6214063041219258, "grad_norm": 0.6179441809654236, "learning_rate": 7.382801664355063e-06, "loss": 0.0474, "step": 3784 }, { "epoch": 2.622099064773121, "grad_norm": 0.6691875457763672, "learning_rate": 7.3821081830790576e-06, "loss": 0.0564, "step": 3785 }, { "epoch": 2.622791825424316, "grad_norm": 0.8572561740875244, "learning_rate": 7.3814147018030525e-06, "loss": 0.0599, "step": 3786 }, { "epoch": 2.623484586075511, "grad_norm": 0.5666581392288208, "learning_rate": 7.3807212205270466e-06, "loss": 0.0433, "step": 3787 }, { "epoch": 2.624177346726706, "grad_norm": 0.7028751373291016, "learning_rate": 7.3800277392510415e-06, "loss": 0.0548, "step": 3788 }, { "epoch": 2.624870107377901, "grad_norm": 0.7753036618232727, "learning_rate": 7.379334257975035e-06, "loss": 0.0659, "step": 3789 }, { "epoch": 2.625562868029096, "grad_norm": 0.6794808506965637, "learning_rate": 7.37864077669903e-06, "loss": 0.0572, "step": 3790 }, { "epoch": 2.626255628680291, "grad_norm": 0.6950632333755493, "learning_rate": 7.377947295423025e-06, "loss": 0.0594, "step": 3791 }, { "epoch": 2.626948389331486, "grad_norm": 0.8033772110939026, "learning_rate": 7.377253814147019e-06, "loss": 0.0565, "step": 3792 }, { "epoch": 2.627641149982681, "grad_norm": 0.6774613857269287, "learning_rate": 7.376560332871014e-06, "loss": 0.057, "step": 3793 }, { "epoch": 2.628333910633876, "grad_norm": 0.6674844026565552, "learning_rate": 7.375866851595007e-06, "loss": 0.0519, "step": 3794 }, { "epoch": 2.629026671285071, "grad_norm": 0.603571891784668, "learning_rate": 7.375173370319002e-06, "loss": 0.0446, "step": 3795 }, { "epoch": 2.629719431936266, "grad_norm": 0.7092214226722717, "learning_rate": 7.374479889042997e-06, "loss": 0.0502, "step": 3796 }, { "epoch": 2.6304121925874613, "grad_norm": 0.6835474371910095, "learning_rate": 7.373786407766991e-06, "loss": 0.0511, "step": 3797 }, { "epoch": 2.631104953238656, "grad_norm": 0.670049786567688, "learning_rate": 7.373092926490986e-06, "loss": 0.0556, "step": 3798 }, { "epoch": 2.631797713889851, "grad_norm": 0.7796568274497986, "learning_rate": 7.372399445214979e-06, "loss": 0.071, "step": 3799 }, { "epoch": 2.6324904745410462, "grad_norm": 0.8385136127471924, "learning_rate": 7.371705963938974e-06, "loss": 0.0555, "step": 3800 }, { "epoch": 2.633183235192241, "grad_norm": 0.7091666460037231, "learning_rate": 7.371012482662969e-06, "loss": 0.0554, "step": 3801 }, { "epoch": 2.633875995843436, "grad_norm": 0.7389783263206482, "learning_rate": 7.370319001386963e-06, "loss": 0.0551, "step": 3802 }, { "epoch": 2.634568756494631, "grad_norm": 0.7565779089927673, "learning_rate": 7.369625520110958e-06, "loss": 0.0445, "step": 3803 }, { "epoch": 2.635261517145826, "grad_norm": 0.6586689352989197, "learning_rate": 7.368932038834952e-06, "loss": 0.0493, "step": 3804 }, { "epoch": 2.6359542777970213, "grad_norm": 0.7760875225067139, "learning_rate": 7.368238557558947e-06, "loss": 0.0735, "step": 3805 }, { "epoch": 2.636647038448216, "grad_norm": 0.735569953918457, "learning_rate": 7.367545076282942e-06, "loss": 0.0523, "step": 3806 }, { "epoch": 2.6373397990994114, "grad_norm": 0.6939058303833008, "learning_rate": 7.366851595006935e-06, "loss": 0.051, "step": 3807 }, { "epoch": 2.6380325597506062, "grad_norm": 0.615863025188446, "learning_rate": 7.36615811373093e-06, "loss": 0.0416, "step": 3808 }, { "epoch": 2.638725320401801, "grad_norm": 0.6464378833770752, "learning_rate": 7.365464632454924e-06, "loss": 0.0583, "step": 3809 }, { "epoch": 2.6394180810529964, "grad_norm": 0.7069610357284546, "learning_rate": 7.364771151178919e-06, "loss": 0.0585, "step": 3810 }, { "epoch": 2.640110841704191, "grad_norm": 0.7769689559936523, "learning_rate": 7.364077669902914e-06, "loss": 0.0625, "step": 3811 }, { "epoch": 2.640803602355386, "grad_norm": 0.7485852837562561, "learning_rate": 7.363384188626907e-06, "loss": 0.0539, "step": 3812 }, { "epoch": 2.6414963630065813, "grad_norm": 0.7916412353515625, "learning_rate": 7.362690707350902e-06, "loss": 0.0566, "step": 3813 }, { "epoch": 2.642189123657776, "grad_norm": 0.5960851907730103, "learning_rate": 7.361997226074896e-06, "loss": 0.0473, "step": 3814 }, { "epoch": 2.642881884308971, "grad_norm": 0.658810555934906, "learning_rate": 7.361303744798891e-06, "loss": 0.0491, "step": 3815 }, { "epoch": 2.6435746449601663, "grad_norm": 0.7364745736122131, "learning_rate": 7.360610263522886e-06, "loss": 0.0609, "step": 3816 }, { "epoch": 2.6442674056113615, "grad_norm": 0.688632071018219, "learning_rate": 7.35991678224688e-06, "loss": 0.0604, "step": 3817 }, { "epoch": 2.6449601662625564, "grad_norm": 0.7310763001441956, "learning_rate": 7.359223300970874e-06, "loss": 0.0593, "step": 3818 }, { "epoch": 2.645652926913751, "grad_norm": 0.6695185899734497, "learning_rate": 7.358529819694868e-06, "loss": 0.0491, "step": 3819 }, { "epoch": 2.6463456875649465, "grad_norm": 0.6718903183937073, "learning_rate": 7.357836338418863e-06, "loss": 0.0671, "step": 3820 }, { "epoch": 2.6470384482161413, "grad_norm": 0.6498568654060364, "learning_rate": 7.357142857142858e-06, "loss": 0.0485, "step": 3821 }, { "epoch": 2.647731208867336, "grad_norm": 0.700734555721283, "learning_rate": 7.356449375866852e-06, "loss": 0.0522, "step": 3822 }, { "epoch": 2.6484239695185314, "grad_norm": 0.7129335403442383, "learning_rate": 7.355755894590847e-06, "loss": 0.0512, "step": 3823 }, { "epoch": 2.6491167301697263, "grad_norm": 0.9340489506721497, "learning_rate": 7.3550624133148405e-06, "loss": 0.0557, "step": 3824 }, { "epoch": 2.649809490820921, "grad_norm": 0.6520780324935913, "learning_rate": 7.354368932038835e-06, "loss": 0.0494, "step": 3825 }, { "epoch": 2.6505022514721164, "grad_norm": 0.6372048258781433, "learning_rate": 7.35367545076283e-06, "loss": 0.0467, "step": 3826 }, { "epoch": 2.6511950121233117, "grad_norm": 0.6569356322288513, "learning_rate": 7.3529819694868244e-06, "loss": 0.0486, "step": 3827 }, { "epoch": 2.6518877727745065, "grad_norm": 0.720349133014679, "learning_rate": 7.352288488210819e-06, "loss": 0.053, "step": 3828 }, { "epoch": 2.6525805334257013, "grad_norm": 0.6570484042167664, "learning_rate": 7.351595006934813e-06, "loss": 0.0519, "step": 3829 }, { "epoch": 2.6532732940768966, "grad_norm": 0.7745928168296814, "learning_rate": 7.3509015256588075e-06, "loss": 0.0681, "step": 3830 }, { "epoch": 2.6539660547280914, "grad_norm": 0.6763771176338196, "learning_rate": 7.3502080443828025e-06, "loss": 0.0567, "step": 3831 }, { "epoch": 2.6546588153792863, "grad_norm": 0.8067436814308167, "learning_rate": 7.3495145631067966e-06, "loss": 0.0542, "step": 3832 }, { "epoch": 2.6553515760304816, "grad_norm": 0.7435930967330933, "learning_rate": 7.3488210818307915e-06, "loss": 0.0482, "step": 3833 }, { "epoch": 2.6560443366816764, "grad_norm": 0.8211119174957275, "learning_rate": 7.3481276005547856e-06, "loss": 0.072, "step": 3834 }, { "epoch": 2.6567370973328712, "grad_norm": 0.6172375082969666, "learning_rate": 7.3474341192787805e-06, "loss": 0.0484, "step": 3835 }, { "epoch": 2.6574298579840665, "grad_norm": 0.789193332195282, "learning_rate": 7.3467406380027754e-06, "loss": 0.0478, "step": 3836 }, { "epoch": 2.658122618635262, "grad_norm": 0.8191679120063782, "learning_rate": 7.346047156726769e-06, "loss": 0.0649, "step": 3837 }, { "epoch": 2.6588153792864566, "grad_norm": 1.0866827964782715, "learning_rate": 7.345353675450764e-06, "loss": 0.0748, "step": 3838 }, { "epoch": 2.6595081399376514, "grad_norm": 0.6912830471992493, "learning_rate": 7.344660194174758e-06, "loss": 0.0607, "step": 3839 }, { "epoch": 2.6602009005888467, "grad_norm": 0.7192431688308716, "learning_rate": 7.343966712898753e-06, "loss": 0.042, "step": 3840 }, { "epoch": 2.6608936612400416, "grad_norm": 0.7954387664794922, "learning_rate": 7.3432732316227476e-06, "loss": 0.0649, "step": 3841 }, { "epoch": 2.6615864218912364, "grad_norm": 0.9418224096298218, "learning_rate": 7.342579750346741e-06, "loss": 0.0694, "step": 3842 }, { "epoch": 2.6622791825424317, "grad_norm": 0.7117047309875488, "learning_rate": 7.341886269070736e-06, "loss": 0.0531, "step": 3843 }, { "epoch": 2.6629719431936265, "grad_norm": 0.7617055773735046, "learning_rate": 7.34119278779473e-06, "loss": 0.0633, "step": 3844 }, { "epoch": 2.6636647038448213, "grad_norm": 0.744536280632019, "learning_rate": 7.340499306518725e-06, "loss": 0.0692, "step": 3845 }, { "epoch": 2.6643574644960166, "grad_norm": 0.7266493439674377, "learning_rate": 7.33980582524272e-06, "loss": 0.0625, "step": 3846 }, { "epoch": 2.665050225147212, "grad_norm": 0.5693483948707581, "learning_rate": 7.339112343966713e-06, "loss": 0.0429, "step": 3847 }, { "epoch": 2.6657429857984067, "grad_norm": 0.6484627723693848, "learning_rate": 7.338418862690708e-06, "loss": 0.0508, "step": 3848 }, { "epoch": 2.6664357464496016, "grad_norm": 0.6556325554847717, "learning_rate": 7.337725381414702e-06, "loss": 0.0539, "step": 3849 }, { "epoch": 2.667128507100797, "grad_norm": 0.749572217464447, "learning_rate": 7.337031900138697e-06, "loss": 0.0647, "step": 3850 }, { "epoch": 2.6678212677519917, "grad_norm": 0.7320320010185242, "learning_rate": 7.336338418862692e-06, "loss": 0.0563, "step": 3851 }, { "epoch": 2.6685140284031865, "grad_norm": 0.720440149307251, "learning_rate": 7.335644937586686e-06, "loss": 0.0627, "step": 3852 }, { "epoch": 2.669206789054382, "grad_norm": 0.6776395440101624, "learning_rate": 7.334951456310681e-06, "loss": 0.0525, "step": 3853 }, { "epoch": 2.6698995497055766, "grad_norm": 0.7585930824279785, "learning_rate": 7.334257975034674e-06, "loss": 0.0517, "step": 3854 }, { "epoch": 2.6705923103567715, "grad_norm": 0.8265200853347778, "learning_rate": 7.333564493758669e-06, "loss": 0.0703, "step": 3855 }, { "epoch": 2.6712850710079667, "grad_norm": 0.667201042175293, "learning_rate": 7.332871012482664e-06, "loss": 0.0548, "step": 3856 }, { "epoch": 2.671977831659162, "grad_norm": 0.6452093720436096, "learning_rate": 7.332177531206658e-06, "loss": 0.0458, "step": 3857 }, { "epoch": 2.672670592310357, "grad_norm": 0.7274707555770874, "learning_rate": 7.331484049930653e-06, "loss": 0.0637, "step": 3858 }, { "epoch": 2.6733633529615517, "grad_norm": 0.7111242413520813, "learning_rate": 7.330790568654646e-06, "loss": 0.0634, "step": 3859 }, { "epoch": 2.674056113612747, "grad_norm": 0.679294764995575, "learning_rate": 7.330097087378641e-06, "loss": 0.0484, "step": 3860 }, { "epoch": 2.674748874263942, "grad_norm": 0.7324373722076416, "learning_rate": 7.329403606102636e-06, "loss": 0.0519, "step": 3861 }, { "epoch": 2.6754416349151366, "grad_norm": 0.7865861058235168, "learning_rate": 7.32871012482663e-06, "loss": 0.0564, "step": 3862 }, { "epoch": 2.676134395566332, "grad_norm": 0.6728985905647278, "learning_rate": 7.328016643550625e-06, "loss": 0.0534, "step": 3863 }, { "epoch": 2.6768271562175268, "grad_norm": 0.7406061887741089, "learning_rate": 7.327323162274619e-06, "loss": 0.0635, "step": 3864 }, { "epoch": 2.6775199168687216, "grad_norm": 0.6972936391830444, "learning_rate": 7.326629680998614e-06, "loss": 0.0606, "step": 3865 }, { "epoch": 2.678212677519917, "grad_norm": 0.7474087476730347, "learning_rate": 7.325936199722608e-06, "loss": 0.0585, "step": 3866 }, { "epoch": 2.678905438171112, "grad_norm": 0.618772566318512, "learning_rate": 7.325242718446602e-06, "loss": 0.0513, "step": 3867 }, { "epoch": 2.679598198822307, "grad_norm": 0.6597331762313843, "learning_rate": 7.324549237170597e-06, "loss": 0.0567, "step": 3868 }, { "epoch": 2.680290959473502, "grad_norm": 0.6849602460861206, "learning_rate": 7.323855755894591e-06, "loss": 0.0536, "step": 3869 }, { "epoch": 2.680983720124697, "grad_norm": 0.6464720368385315, "learning_rate": 7.323162274618586e-06, "loss": 0.0486, "step": 3870 }, { "epoch": 2.681676480775892, "grad_norm": 0.7262237071990967, "learning_rate": 7.322468793342581e-06, "loss": 0.053, "step": 3871 }, { "epoch": 2.6823692414270868, "grad_norm": 0.7746634483337402, "learning_rate": 7.321775312066574e-06, "loss": 0.0627, "step": 3872 }, { "epoch": 2.683062002078282, "grad_norm": 0.677756667137146, "learning_rate": 7.321081830790569e-06, "loss": 0.0496, "step": 3873 }, { "epoch": 2.683754762729477, "grad_norm": 0.6235165596008301, "learning_rate": 7.3203883495145634e-06, "loss": 0.0518, "step": 3874 }, { "epoch": 2.6844475233806717, "grad_norm": 0.6622484922409058, "learning_rate": 7.319694868238558e-06, "loss": 0.0535, "step": 3875 }, { "epoch": 2.685140284031867, "grad_norm": 0.7261942028999329, "learning_rate": 7.319001386962553e-06, "loss": 0.0665, "step": 3876 }, { "epoch": 2.6858330446830623, "grad_norm": 0.7278459072113037, "learning_rate": 7.3183079056865465e-06, "loss": 0.0589, "step": 3877 }, { "epoch": 2.686525805334257, "grad_norm": 0.7369149923324585, "learning_rate": 7.3176144244105415e-06, "loss": 0.0549, "step": 3878 }, { "epoch": 2.687218565985452, "grad_norm": 0.7115172147750854, "learning_rate": 7.3169209431345356e-06, "loss": 0.0568, "step": 3879 }, { "epoch": 2.6879113266366472, "grad_norm": 0.8291954398155212, "learning_rate": 7.3162274618585305e-06, "loss": 0.0548, "step": 3880 }, { "epoch": 2.688604087287842, "grad_norm": 0.789232611656189, "learning_rate": 7.315533980582525e-06, "loss": 0.0681, "step": 3881 }, { "epoch": 2.689296847939037, "grad_norm": 0.7152739763259888, "learning_rate": 7.3148404993065195e-06, "loss": 0.0642, "step": 3882 }, { "epoch": 2.689989608590232, "grad_norm": 0.7494675517082214, "learning_rate": 7.3141470180305144e-06, "loss": 0.0537, "step": 3883 }, { "epoch": 2.690682369241427, "grad_norm": 0.6653921604156494, "learning_rate": 7.313453536754508e-06, "loss": 0.045, "step": 3884 }, { "epoch": 2.691375129892622, "grad_norm": 0.6322457790374756, "learning_rate": 7.312760055478503e-06, "loss": 0.0514, "step": 3885 }, { "epoch": 2.692067890543817, "grad_norm": 0.7568194270133972, "learning_rate": 7.3120665742024975e-06, "loss": 0.0674, "step": 3886 }, { "epoch": 2.6927606511950124, "grad_norm": 0.6978754997253418, "learning_rate": 7.311373092926492e-06, "loss": 0.0548, "step": 3887 }, { "epoch": 2.6934534118462072, "grad_norm": 0.7465512752532959, "learning_rate": 7.3106796116504866e-06, "loss": 0.0489, "step": 3888 }, { "epoch": 2.694146172497402, "grad_norm": 0.595215380191803, "learning_rate": 7.30998613037448e-06, "loss": 0.0406, "step": 3889 }, { "epoch": 2.6948389331485973, "grad_norm": 0.6685272455215454, "learning_rate": 7.309292649098475e-06, "loss": 0.0504, "step": 3890 }, { "epoch": 2.695531693799792, "grad_norm": 0.6387081742286682, "learning_rate": 7.30859916782247e-06, "loss": 0.0525, "step": 3891 }, { "epoch": 2.696224454450987, "grad_norm": 0.8346490859985352, "learning_rate": 7.307905686546464e-06, "loss": 0.0656, "step": 3892 }, { "epoch": 2.6969172151021823, "grad_norm": 0.7743673324584961, "learning_rate": 7.307212205270459e-06, "loss": 0.0599, "step": 3893 }, { "epoch": 2.697609975753377, "grad_norm": 0.7080375552177429, "learning_rate": 7.306518723994453e-06, "loss": 0.062, "step": 3894 }, { "epoch": 2.698302736404572, "grad_norm": 0.6892623901367188, "learning_rate": 7.305825242718447e-06, "loss": 0.0543, "step": 3895 }, { "epoch": 2.6989954970557672, "grad_norm": 0.778924822807312, "learning_rate": 7.305131761442442e-06, "loss": 0.0602, "step": 3896 }, { "epoch": 2.699688257706962, "grad_norm": 0.8009116649627686, "learning_rate": 7.304438280166436e-06, "loss": 0.054, "step": 3897 }, { "epoch": 2.7003810183581574, "grad_norm": 0.6834834218025208, "learning_rate": 7.303744798890431e-06, "loss": 0.0482, "step": 3898 }, { "epoch": 2.701073779009352, "grad_norm": 0.7371225357055664, "learning_rate": 7.303051317614425e-06, "loss": 0.0596, "step": 3899 }, { "epoch": 2.7017665396605475, "grad_norm": 0.7083562612533569, "learning_rate": 7.30235783633842e-06, "loss": 0.0626, "step": 3900 }, { "epoch": 2.7024593003117423, "grad_norm": 0.8710654377937317, "learning_rate": 7.301664355062415e-06, "loss": 0.0658, "step": 3901 }, { "epoch": 2.703152060962937, "grad_norm": 0.6961199045181274, "learning_rate": 7.300970873786408e-06, "loss": 0.0531, "step": 3902 }, { "epoch": 2.7038448216141324, "grad_norm": 0.6109960079193115, "learning_rate": 7.300277392510403e-06, "loss": 0.0463, "step": 3903 }, { "epoch": 2.7045375822653273, "grad_norm": 0.7572680115699768, "learning_rate": 7.299583911234397e-06, "loss": 0.0566, "step": 3904 }, { "epoch": 2.705230342916522, "grad_norm": 0.7776378393173218, "learning_rate": 7.298890429958392e-06, "loss": 0.0616, "step": 3905 }, { "epoch": 2.7059231035677174, "grad_norm": 0.7098103165626526, "learning_rate": 7.298196948682387e-06, "loss": 0.0512, "step": 3906 }, { "epoch": 2.706615864218912, "grad_norm": 0.6883637309074402, "learning_rate": 7.29750346740638e-06, "loss": 0.0478, "step": 3907 }, { "epoch": 2.7073086248701075, "grad_norm": 0.6861960291862488, "learning_rate": 7.296809986130375e-06, "loss": 0.0684, "step": 3908 }, { "epoch": 2.7080013855213023, "grad_norm": 0.7326226234436035, "learning_rate": 7.296116504854369e-06, "loss": 0.0536, "step": 3909 }, { "epoch": 2.7086941461724976, "grad_norm": 0.721124529838562, "learning_rate": 7.295423023578364e-06, "loss": 0.0402, "step": 3910 }, { "epoch": 2.7093869068236924, "grad_norm": 0.6418149471282959, "learning_rate": 7.294729542302359e-06, "loss": 0.0554, "step": 3911 }, { "epoch": 2.7100796674748873, "grad_norm": 0.7129888534545898, "learning_rate": 7.294036061026353e-06, "loss": 0.0496, "step": 3912 }, { "epoch": 2.7107724281260825, "grad_norm": 0.6219625473022461, "learning_rate": 7.293342579750348e-06, "loss": 0.0419, "step": 3913 }, { "epoch": 2.7114651887772774, "grad_norm": 0.6785973906517029, "learning_rate": 7.292649098474341e-06, "loss": 0.051, "step": 3914 }, { "epoch": 2.712157949428472, "grad_norm": 0.7339017987251282, "learning_rate": 7.291955617198336e-06, "loss": 0.0567, "step": 3915 }, { "epoch": 2.7128507100796675, "grad_norm": 0.7542696595191956, "learning_rate": 7.291262135922331e-06, "loss": 0.068, "step": 3916 }, { "epoch": 2.7135434707308623, "grad_norm": 0.8791110515594482, "learning_rate": 7.290568654646325e-06, "loss": 0.0545, "step": 3917 }, { "epoch": 2.7142362313820576, "grad_norm": 0.6041953563690186, "learning_rate": 7.28987517337032e-06, "loss": 0.0511, "step": 3918 }, { "epoch": 2.7149289920332524, "grad_norm": 0.7498956322669983, "learning_rate": 7.289181692094313e-06, "loss": 0.0525, "step": 3919 }, { "epoch": 2.7156217526844477, "grad_norm": 0.6190578937530518, "learning_rate": 7.288488210818308e-06, "loss": 0.038, "step": 3920 }, { "epoch": 2.7163145133356426, "grad_norm": 0.7730886936187744, "learning_rate": 7.287794729542303e-06, "loss": 0.0508, "step": 3921 }, { "epoch": 2.7170072739868374, "grad_norm": 0.7318882942199707, "learning_rate": 7.287101248266297e-06, "loss": 0.0568, "step": 3922 }, { "epoch": 2.7177000346380327, "grad_norm": 0.834860622882843, "learning_rate": 7.286407766990292e-06, "loss": 0.0659, "step": 3923 }, { "epoch": 2.7183927952892275, "grad_norm": 0.7603409886360168, "learning_rate": 7.285714285714286e-06, "loss": 0.065, "step": 3924 }, { "epoch": 2.7190855559404223, "grad_norm": 0.6554049253463745, "learning_rate": 7.2850208044382805e-06, "loss": 0.058, "step": 3925 }, { "epoch": 2.7197783165916176, "grad_norm": 1.1610909700393677, "learning_rate": 7.284327323162275e-06, "loss": 0.0726, "step": 3926 }, { "epoch": 2.7204710772428125, "grad_norm": 0.6697677969932556, "learning_rate": 7.2836338418862695e-06, "loss": 0.0585, "step": 3927 }, { "epoch": 2.7211638378940077, "grad_norm": 0.6267381906509399, "learning_rate": 7.282940360610264e-06, "loss": 0.0466, "step": 3928 }, { "epoch": 2.7218565985452026, "grad_norm": 0.6425818800926208, "learning_rate": 7.2822468793342585e-06, "loss": 0.0478, "step": 3929 }, { "epoch": 2.722549359196398, "grad_norm": 0.7394554018974304, "learning_rate": 7.2815533980582534e-06, "loss": 0.0587, "step": 3930 }, { "epoch": 2.7232421198475927, "grad_norm": 0.7270798087120056, "learning_rate": 7.280859916782248e-06, "loss": 0.0564, "step": 3931 }, { "epoch": 2.7239348804987875, "grad_norm": 0.7313693761825562, "learning_rate": 7.280166435506242e-06, "loss": 0.0574, "step": 3932 }, { "epoch": 2.724627641149983, "grad_norm": 0.7176659107208252, "learning_rate": 7.2794729542302365e-06, "loss": 0.0565, "step": 3933 }, { "epoch": 2.7253204018011776, "grad_norm": 0.6980408430099487, "learning_rate": 7.278779472954231e-06, "loss": 0.0502, "step": 3934 }, { "epoch": 2.7260131624523725, "grad_norm": 0.7114086151123047, "learning_rate": 7.2780859916782256e-06, "loss": 0.0592, "step": 3935 }, { "epoch": 2.7267059231035677, "grad_norm": 0.6872692108154297, "learning_rate": 7.2773925104022205e-06, "loss": 0.0474, "step": 3936 }, { "epoch": 2.7273986837547626, "grad_norm": 0.6300058960914612, "learning_rate": 7.276699029126214e-06, "loss": 0.046, "step": 3937 }, { "epoch": 2.728091444405958, "grad_norm": 0.6446839570999146, "learning_rate": 7.276005547850209e-06, "loss": 0.0503, "step": 3938 }, { "epoch": 2.7287842050571527, "grad_norm": 0.5818908214569092, "learning_rate": 7.275312066574203e-06, "loss": 0.0428, "step": 3939 }, { "epoch": 2.729476965708348, "grad_norm": 0.7795031070709229, "learning_rate": 7.274618585298198e-06, "loss": 0.0592, "step": 3940 }, { "epoch": 2.730169726359543, "grad_norm": 0.6601243615150452, "learning_rate": 7.273925104022193e-06, "loss": 0.0529, "step": 3941 }, { "epoch": 2.7308624870107376, "grad_norm": 0.7666286826133728, "learning_rate": 7.273231622746187e-06, "loss": 0.0637, "step": 3942 }, { "epoch": 2.731555247661933, "grad_norm": 0.7480244040489197, "learning_rate": 7.272538141470181e-06, "loss": 0.062, "step": 3943 }, { "epoch": 2.7322480083131278, "grad_norm": 0.7202300429344177, "learning_rate": 7.271844660194175e-06, "loss": 0.0521, "step": 3944 }, { "epoch": 2.7329407689643226, "grad_norm": 0.7029700875282288, "learning_rate": 7.27115117891817e-06, "loss": 0.0648, "step": 3945 }, { "epoch": 2.733633529615518, "grad_norm": 0.7759830951690674, "learning_rate": 7.270457697642165e-06, "loss": 0.0564, "step": 3946 }, { "epoch": 2.7343262902667127, "grad_norm": 0.7026242017745972, "learning_rate": 7.269764216366159e-06, "loss": 0.0591, "step": 3947 }, { "epoch": 2.735019050917908, "grad_norm": 0.7203380465507507, "learning_rate": 7.269070735090154e-06, "loss": 0.0581, "step": 3948 }, { "epoch": 2.735711811569103, "grad_norm": 0.7329349517822266, "learning_rate": 7.268377253814147e-06, "loss": 0.0555, "step": 3949 }, { "epoch": 2.736404572220298, "grad_norm": 0.8174529075622559, "learning_rate": 7.267683772538142e-06, "loss": 0.0607, "step": 3950 }, { "epoch": 2.737097332871493, "grad_norm": 0.6682785153388977, "learning_rate": 7.266990291262137e-06, "loss": 0.0461, "step": 3951 }, { "epoch": 2.7377900935226878, "grad_norm": 0.7733043432235718, "learning_rate": 7.266296809986131e-06, "loss": 0.048, "step": 3952 }, { "epoch": 2.738482854173883, "grad_norm": 0.8042845726013184, "learning_rate": 7.265603328710126e-06, "loss": 0.0594, "step": 3953 }, { "epoch": 2.739175614825078, "grad_norm": 0.688261091709137, "learning_rate": 7.264909847434119e-06, "loss": 0.0609, "step": 3954 }, { "epoch": 2.7398683754762727, "grad_norm": 1.2057961225509644, "learning_rate": 7.264216366158114e-06, "loss": 0.0676, "step": 3955 }, { "epoch": 2.740561136127468, "grad_norm": 0.7686060070991516, "learning_rate": 7.263522884882109e-06, "loss": 0.07, "step": 3956 }, { "epoch": 2.741253896778663, "grad_norm": 0.76099693775177, "learning_rate": 7.262829403606103e-06, "loss": 0.0537, "step": 3957 }, { "epoch": 2.741946657429858, "grad_norm": 0.7034679651260376, "learning_rate": 7.262135922330098e-06, "loss": 0.0568, "step": 3958 }, { "epoch": 2.742639418081053, "grad_norm": 0.7430107593536377, "learning_rate": 7.261442441054092e-06, "loss": 0.0611, "step": 3959 }, { "epoch": 2.743332178732248, "grad_norm": 0.5567717552185059, "learning_rate": 7.260748959778087e-06, "loss": 0.0499, "step": 3960 }, { "epoch": 2.744024939383443, "grad_norm": 0.7748571038246155, "learning_rate": 7.260055478502082e-06, "loss": 0.0642, "step": 3961 }, { "epoch": 2.744717700034638, "grad_norm": 0.8475006818771362, "learning_rate": 7.259361997226075e-06, "loss": 0.0697, "step": 3962 }, { "epoch": 2.745410460685833, "grad_norm": 0.6778804659843445, "learning_rate": 7.25866851595007e-06, "loss": 0.0609, "step": 3963 }, { "epoch": 2.746103221337028, "grad_norm": 0.6167569160461426, "learning_rate": 7.257975034674064e-06, "loss": 0.0534, "step": 3964 }, { "epoch": 2.746795981988223, "grad_norm": 0.8046923875808716, "learning_rate": 7.257281553398059e-06, "loss": 0.053, "step": 3965 }, { "epoch": 2.747488742639418, "grad_norm": 0.8193111419677734, "learning_rate": 7.256588072122054e-06, "loss": 0.0662, "step": 3966 }, { "epoch": 2.748181503290613, "grad_norm": 0.8219735622406006, "learning_rate": 7.255894590846047e-06, "loss": 0.0715, "step": 3967 }, { "epoch": 2.7488742639418082, "grad_norm": 0.7069618701934814, "learning_rate": 7.255201109570042e-06, "loss": 0.064, "step": 3968 }, { "epoch": 2.749567024593003, "grad_norm": 0.7169185876846313, "learning_rate": 7.254507628294036e-06, "loss": 0.0652, "step": 3969 }, { "epoch": 2.7502597852441983, "grad_norm": 0.7799632549285889, "learning_rate": 7.253814147018031e-06, "loss": 0.0532, "step": 3970 }, { "epoch": 2.750952545895393, "grad_norm": 0.7554654479026794, "learning_rate": 7.253120665742026e-06, "loss": 0.0771, "step": 3971 }, { "epoch": 2.751645306546588, "grad_norm": 0.8206004500389099, "learning_rate": 7.25242718446602e-06, "loss": 0.066, "step": 3972 }, { "epoch": 2.7523380671977833, "grad_norm": 0.8165253400802612, "learning_rate": 7.251733703190014e-06, "loss": 0.0512, "step": 3973 }, { "epoch": 2.753030827848978, "grad_norm": 0.7096630930900574, "learning_rate": 7.2510402219140085e-06, "loss": 0.0572, "step": 3974 }, { "epoch": 2.753723588500173, "grad_norm": 0.6919059157371521, "learning_rate": 7.250346740638003e-06, "loss": 0.0524, "step": 3975 }, { "epoch": 2.7544163491513682, "grad_norm": 0.6617465019226074, "learning_rate": 7.2496532593619975e-06, "loss": 0.0491, "step": 3976 }, { "epoch": 2.755109109802563, "grad_norm": 0.6623074412345886, "learning_rate": 7.2489597780859924e-06, "loss": 0.0614, "step": 3977 }, { "epoch": 2.7558018704537584, "grad_norm": 0.6906787157058716, "learning_rate": 7.248266296809987e-06, "loss": 0.061, "step": 3978 }, { "epoch": 2.756494631104953, "grad_norm": 0.6674212217330933, "learning_rate": 7.247572815533981e-06, "loss": 0.0496, "step": 3979 }, { "epoch": 2.7571873917561485, "grad_norm": 0.6485714316368103, "learning_rate": 7.2468793342579755e-06, "loss": 0.0574, "step": 3980 }, { "epoch": 2.7578801524073433, "grad_norm": 0.693846583366394, "learning_rate": 7.24618585298197e-06, "loss": 0.0425, "step": 3981 }, { "epoch": 2.758572913058538, "grad_norm": 0.8718501329421997, "learning_rate": 7.2454923717059646e-06, "loss": 0.0617, "step": 3982 }, { "epoch": 2.7592656737097334, "grad_norm": 0.7123166918754578, "learning_rate": 7.2447988904299595e-06, "loss": 0.0587, "step": 3983 }, { "epoch": 2.7599584343609282, "grad_norm": 0.6755354404449463, "learning_rate": 7.244105409153953e-06, "loss": 0.0509, "step": 3984 }, { "epoch": 2.760651195012123, "grad_norm": 0.6938267350196838, "learning_rate": 7.243411927877948e-06, "loss": 0.0579, "step": 3985 }, { "epoch": 2.7613439556633184, "grad_norm": 0.6104715466499329, "learning_rate": 7.242718446601942e-06, "loss": 0.0477, "step": 3986 }, { "epoch": 2.762036716314513, "grad_norm": 0.760593831539154, "learning_rate": 7.242024965325937e-06, "loss": 0.0579, "step": 3987 }, { "epoch": 2.7627294769657085, "grad_norm": 0.831852376461029, "learning_rate": 7.241331484049932e-06, "loss": 0.0718, "step": 3988 }, { "epoch": 2.7634222376169033, "grad_norm": 0.7221866250038147, "learning_rate": 7.240638002773926e-06, "loss": 0.0539, "step": 3989 }, { "epoch": 2.7641149982680986, "grad_norm": 0.6120632290840149, "learning_rate": 7.239944521497921e-06, "loss": 0.0601, "step": 3990 }, { "epoch": 2.7648077589192934, "grad_norm": 0.8550262451171875, "learning_rate": 7.239251040221914e-06, "loss": 0.0688, "step": 3991 }, { "epoch": 2.7655005195704883, "grad_norm": 0.7031256556510925, "learning_rate": 7.238557558945909e-06, "loss": 0.0623, "step": 3992 }, { "epoch": 2.7661932802216835, "grad_norm": 0.677352786064148, "learning_rate": 7.237864077669904e-06, "loss": 0.051, "step": 3993 }, { "epoch": 2.7668860408728784, "grad_norm": 0.6129022240638733, "learning_rate": 7.237170596393898e-06, "loss": 0.0472, "step": 3994 }, { "epoch": 2.767578801524073, "grad_norm": 0.6582059264183044, "learning_rate": 7.236477115117893e-06, "loss": 0.0391, "step": 3995 }, { "epoch": 2.7682715621752685, "grad_norm": 0.7357588410377502, "learning_rate": 7.235783633841886e-06, "loss": 0.0633, "step": 3996 }, { "epoch": 2.7689643228264633, "grad_norm": 0.7520215511322021, "learning_rate": 7.235090152565881e-06, "loss": 0.0528, "step": 3997 }, { "epoch": 2.7696570834776586, "grad_norm": 0.7143958806991577, "learning_rate": 7.234396671289876e-06, "loss": 0.0611, "step": 3998 }, { "epoch": 2.7703498441288534, "grad_norm": 0.6682828068733215, "learning_rate": 7.23370319001387e-06, "loss": 0.0595, "step": 3999 }, { "epoch": 2.7710426047800487, "grad_norm": 0.6994439959526062, "learning_rate": 7.233009708737865e-06, "loss": 0.0688, "step": 4000 }, { "epoch": 2.7717353654312435, "grad_norm": 0.7368742227554321, "learning_rate": 7.232316227461859e-06, "loss": 0.0635, "step": 4001 }, { "epoch": 2.7724281260824384, "grad_norm": 0.6693440675735474, "learning_rate": 7.231622746185853e-06, "loss": 0.0564, "step": 4002 }, { "epoch": 2.7731208867336337, "grad_norm": 0.623653769493103, "learning_rate": 7.230929264909848e-06, "loss": 0.0419, "step": 4003 }, { "epoch": 2.7738136473848285, "grad_norm": 0.7218508124351501, "learning_rate": 7.230235783633842e-06, "loss": 0.0616, "step": 4004 }, { "epoch": 2.7745064080360233, "grad_norm": 0.8679817914962769, "learning_rate": 7.229542302357837e-06, "loss": 0.0567, "step": 4005 }, { "epoch": 2.7751991686872186, "grad_norm": 0.7767743468284607, "learning_rate": 7.228848821081831e-06, "loss": 0.0615, "step": 4006 }, { "epoch": 2.7758919293384134, "grad_norm": 0.6840187907218933, "learning_rate": 7.228155339805826e-06, "loss": 0.0525, "step": 4007 }, { "epoch": 2.7765846899896087, "grad_norm": 0.6501522064208984, "learning_rate": 7.227461858529821e-06, "loss": 0.0478, "step": 4008 }, { "epoch": 2.7772774506408036, "grad_norm": 0.6367305517196655, "learning_rate": 7.226768377253814e-06, "loss": 0.0496, "step": 4009 }, { "epoch": 2.777970211291999, "grad_norm": 0.765961766242981, "learning_rate": 7.226074895977809e-06, "loss": 0.0549, "step": 4010 }, { "epoch": 2.7786629719431937, "grad_norm": 0.7556353211402893, "learning_rate": 7.225381414701803e-06, "loss": 0.0597, "step": 4011 }, { "epoch": 2.7793557325943885, "grad_norm": 0.674601137638092, "learning_rate": 7.224687933425798e-06, "loss": 0.0559, "step": 4012 }, { "epoch": 2.780048493245584, "grad_norm": 0.7502955198287964, "learning_rate": 7.223994452149793e-06, "loss": 0.0544, "step": 4013 }, { "epoch": 2.7807412538967786, "grad_norm": 0.7685797214508057, "learning_rate": 7.223300970873786e-06, "loss": 0.0496, "step": 4014 }, { "epoch": 2.7814340145479735, "grad_norm": 0.6448591351509094, "learning_rate": 7.222607489597781e-06, "loss": 0.0434, "step": 4015 }, { "epoch": 2.7821267751991687, "grad_norm": 0.5939489603042603, "learning_rate": 7.221914008321775e-06, "loss": 0.0425, "step": 4016 }, { "epoch": 2.7828195358503636, "grad_norm": 0.6740111112594604, "learning_rate": 7.22122052704577e-06, "loss": 0.0593, "step": 4017 }, { "epoch": 2.783512296501559, "grad_norm": 0.7750644087791443, "learning_rate": 7.220527045769765e-06, "loss": 0.0575, "step": 4018 }, { "epoch": 2.7842050571527537, "grad_norm": 0.6008273363113403, "learning_rate": 7.219833564493759e-06, "loss": 0.0524, "step": 4019 }, { "epoch": 2.784897817803949, "grad_norm": 0.6259392499923706, "learning_rate": 7.219140083217754e-06, "loss": 0.0407, "step": 4020 }, { "epoch": 2.785590578455144, "grad_norm": 0.6633020043373108, "learning_rate": 7.2184466019417475e-06, "loss": 0.0535, "step": 4021 }, { "epoch": 2.7862833391063386, "grad_norm": 0.7504967451095581, "learning_rate": 7.217753120665742e-06, "loss": 0.0554, "step": 4022 }, { "epoch": 2.786976099757534, "grad_norm": 0.5940077304840088, "learning_rate": 7.217059639389737e-06, "loss": 0.0445, "step": 4023 }, { "epoch": 2.7876688604087287, "grad_norm": 0.7825884819030762, "learning_rate": 7.2163661581137314e-06, "loss": 0.0592, "step": 4024 }, { "epoch": 2.7883616210599236, "grad_norm": 0.6419134736061096, "learning_rate": 7.215672676837726e-06, "loss": 0.0541, "step": 4025 }, { "epoch": 2.789054381711119, "grad_norm": 0.7044313549995422, "learning_rate": 7.21497919556172e-06, "loss": 0.0621, "step": 4026 }, { "epoch": 2.7897471423623137, "grad_norm": 1.0729942321777344, "learning_rate": 7.2142857142857145e-06, "loss": 0.0581, "step": 4027 }, { "epoch": 2.790439903013509, "grad_norm": 0.6810662150382996, "learning_rate": 7.2135922330097095e-06, "loss": 0.0599, "step": 4028 }, { "epoch": 2.791132663664704, "grad_norm": 0.6379590034484863, "learning_rate": 7.2128987517337036e-06, "loss": 0.0476, "step": 4029 }, { "epoch": 2.791825424315899, "grad_norm": 0.7135487794876099, "learning_rate": 7.2122052704576985e-06, "loss": 0.0505, "step": 4030 }, { "epoch": 2.792518184967094, "grad_norm": 0.7583690285682678, "learning_rate": 7.211511789181692e-06, "loss": 0.0623, "step": 4031 }, { "epoch": 2.7932109456182888, "grad_norm": 0.6712807416915894, "learning_rate": 7.210818307905687e-06, "loss": 0.0471, "step": 4032 }, { "epoch": 2.793903706269484, "grad_norm": 0.7846777439117432, "learning_rate": 7.210124826629682e-06, "loss": 0.0534, "step": 4033 }, { "epoch": 2.794596466920679, "grad_norm": 0.6510107517242432, "learning_rate": 7.209431345353676e-06, "loss": 0.0516, "step": 4034 }, { "epoch": 2.7952892275718737, "grad_norm": 0.8907626271247864, "learning_rate": 7.208737864077671e-06, "loss": 0.0721, "step": 4035 }, { "epoch": 2.795981988223069, "grad_norm": 0.8742437362670898, "learning_rate": 7.208044382801665e-06, "loss": 0.0606, "step": 4036 }, { "epoch": 2.796674748874264, "grad_norm": 0.706000030040741, "learning_rate": 7.20735090152566e-06, "loss": 0.0421, "step": 4037 }, { "epoch": 2.797367509525459, "grad_norm": 0.6947118043899536, "learning_rate": 7.206657420249655e-06, "loss": 0.0563, "step": 4038 }, { "epoch": 2.798060270176654, "grad_norm": 0.7536004185676575, "learning_rate": 7.205963938973648e-06, "loss": 0.051, "step": 4039 }, { "epoch": 2.798753030827849, "grad_norm": 0.6250074505805969, "learning_rate": 7.205270457697643e-06, "loss": 0.0512, "step": 4040 }, { "epoch": 2.799445791479044, "grad_norm": 0.7176235914230347, "learning_rate": 7.204576976421637e-06, "loss": 0.0575, "step": 4041 }, { "epoch": 2.800138552130239, "grad_norm": 0.7992220520973206, "learning_rate": 7.203883495145632e-06, "loss": 0.063, "step": 4042 }, { "epoch": 2.800831312781434, "grad_norm": 0.732123613357544, "learning_rate": 7.203190013869627e-06, "loss": 0.0542, "step": 4043 }, { "epoch": 2.801524073432629, "grad_norm": 0.6620665192604065, "learning_rate": 7.20249653259362e-06, "loss": 0.0523, "step": 4044 }, { "epoch": 2.802216834083824, "grad_norm": 0.675926148891449, "learning_rate": 7.201803051317615e-06, "loss": 0.0545, "step": 4045 }, { "epoch": 2.802909594735019, "grad_norm": 0.7969706654548645, "learning_rate": 7.201109570041609e-06, "loss": 0.0489, "step": 4046 }, { "epoch": 2.803602355386214, "grad_norm": 0.6930369734764099, "learning_rate": 7.200416088765604e-06, "loss": 0.055, "step": 4047 }, { "epoch": 2.804295116037409, "grad_norm": 0.6895542740821838, "learning_rate": 7.199722607489599e-06, "loss": 0.0571, "step": 4048 }, { "epoch": 2.804987876688604, "grad_norm": 0.7721331715583801, "learning_rate": 7.199029126213593e-06, "loss": 0.0664, "step": 4049 }, { "epoch": 2.8056806373397993, "grad_norm": 0.7242488265037537, "learning_rate": 7.198335644937587e-06, "loss": 0.0626, "step": 4050 }, { "epoch": 2.806373397990994, "grad_norm": 0.6718471646308899, "learning_rate": 7.197642163661581e-06, "loss": 0.0486, "step": 4051 }, { "epoch": 2.807066158642189, "grad_norm": 0.7310835719108582, "learning_rate": 7.196948682385576e-06, "loss": 0.0632, "step": 4052 }, { "epoch": 2.8077589192933843, "grad_norm": 0.6099986433982849, "learning_rate": 7.196255201109571e-06, "loss": 0.046, "step": 4053 }, { "epoch": 2.808451679944579, "grad_norm": 0.7324040532112122, "learning_rate": 7.195561719833565e-06, "loss": 0.0593, "step": 4054 }, { "epoch": 2.809144440595774, "grad_norm": 0.8426403403282166, "learning_rate": 7.19486823855756e-06, "loss": 0.0699, "step": 4055 }, { "epoch": 2.8098372012469692, "grad_norm": 0.7356042265892029, "learning_rate": 7.194174757281553e-06, "loss": 0.0489, "step": 4056 }, { "epoch": 2.810529961898164, "grad_norm": 0.7246345281600952, "learning_rate": 7.193481276005548e-06, "loss": 0.0632, "step": 4057 }, { "epoch": 2.8112227225493593, "grad_norm": 0.7151074409484863, "learning_rate": 7.192787794729543e-06, "loss": 0.0439, "step": 4058 }, { "epoch": 2.811915483200554, "grad_norm": 0.7404351830482483, "learning_rate": 7.192094313453537e-06, "loss": 0.0549, "step": 4059 }, { "epoch": 2.8126082438517495, "grad_norm": 0.8077854514122009, "learning_rate": 7.191400832177532e-06, "loss": 0.0478, "step": 4060 }, { "epoch": 2.8133010045029443, "grad_norm": 0.6892199516296387, "learning_rate": 7.190707350901525e-06, "loss": 0.0573, "step": 4061 }, { "epoch": 2.813993765154139, "grad_norm": 0.6757320761680603, "learning_rate": 7.19001386962552e-06, "loss": 0.0497, "step": 4062 }, { "epoch": 2.8146865258053344, "grad_norm": 0.717888593673706, "learning_rate": 7.189320388349515e-06, "loss": 0.0614, "step": 4063 }, { "epoch": 2.8153792864565292, "grad_norm": 0.6951326131820679, "learning_rate": 7.188626907073509e-06, "loss": 0.0524, "step": 4064 }, { "epoch": 2.816072047107724, "grad_norm": 0.7384958267211914, "learning_rate": 7.187933425797504e-06, "loss": 0.0414, "step": 4065 }, { "epoch": 2.8167648077589194, "grad_norm": 0.7302805781364441, "learning_rate": 7.187239944521498e-06, "loss": 0.0508, "step": 4066 }, { "epoch": 2.817457568410114, "grad_norm": 0.7409061193466187, "learning_rate": 7.186546463245493e-06, "loss": 0.0581, "step": 4067 }, { "epoch": 2.8181503290613095, "grad_norm": 0.6641678214073181, "learning_rate": 7.185852981969488e-06, "loss": 0.0523, "step": 4068 }, { "epoch": 2.8188430897125043, "grad_norm": 0.6693853139877319, "learning_rate": 7.1851595006934814e-06, "loss": 0.0458, "step": 4069 }, { "epoch": 2.8195358503636996, "grad_norm": 0.7115046381950378, "learning_rate": 7.184466019417476e-06, "loss": 0.0558, "step": 4070 }, { "epoch": 2.8202286110148944, "grad_norm": 0.6776880025863647, "learning_rate": 7.1837725381414704e-06, "loss": 0.0532, "step": 4071 }, { "epoch": 2.8209213716660893, "grad_norm": 0.72972172498703, "learning_rate": 7.183079056865465e-06, "loss": 0.0482, "step": 4072 }, { "epoch": 2.8216141323172845, "grad_norm": 0.6619596481323242, "learning_rate": 7.18238557558946e-06, "loss": 0.0505, "step": 4073 }, { "epoch": 2.8223068929684794, "grad_norm": 0.7599324584007263, "learning_rate": 7.1816920943134535e-06, "loss": 0.0624, "step": 4074 }, { "epoch": 2.822999653619674, "grad_norm": 0.7475978136062622, "learning_rate": 7.1809986130374485e-06, "loss": 0.0666, "step": 4075 }, { "epoch": 2.8236924142708695, "grad_norm": 0.7838507294654846, "learning_rate": 7.1803051317614426e-06, "loss": 0.0568, "step": 4076 }, { "epoch": 2.8243851749220643, "grad_norm": 0.7802300453186035, "learning_rate": 7.1796116504854375e-06, "loss": 0.0501, "step": 4077 }, { "epoch": 2.8250779355732596, "grad_norm": 0.6380999088287354, "learning_rate": 7.1789181692094324e-06, "loss": 0.0526, "step": 4078 }, { "epoch": 2.8257706962244544, "grad_norm": 0.7001136541366577, "learning_rate": 7.178224687933426e-06, "loss": 0.0609, "step": 4079 }, { "epoch": 2.8264634568756497, "grad_norm": 0.8101133108139038, "learning_rate": 7.177531206657421e-06, "loss": 0.0673, "step": 4080 }, { "epoch": 2.8271562175268445, "grad_norm": 0.757917046546936, "learning_rate": 7.176837725381415e-06, "loss": 0.0684, "step": 4081 }, { "epoch": 2.8278489781780394, "grad_norm": 0.7094997763633728, "learning_rate": 7.17614424410541e-06, "loss": 0.0559, "step": 4082 }, { "epoch": 2.8285417388292347, "grad_norm": 0.7780656814575195, "learning_rate": 7.1754507628294046e-06, "loss": 0.0693, "step": 4083 }, { "epoch": 2.8292344994804295, "grad_norm": 0.8230788707733154, "learning_rate": 7.174757281553399e-06, "loss": 0.0599, "step": 4084 }, { "epoch": 2.8299272601316243, "grad_norm": 0.8302596807479858, "learning_rate": 7.174063800277394e-06, "loss": 0.0583, "step": 4085 }, { "epoch": 2.8306200207828196, "grad_norm": 0.6846452951431274, "learning_rate": 7.173370319001387e-06, "loss": 0.057, "step": 4086 }, { "epoch": 2.8313127814340144, "grad_norm": 0.7271231412887573, "learning_rate": 7.172676837725382e-06, "loss": 0.0531, "step": 4087 }, { "epoch": 2.8320055420852097, "grad_norm": 0.7599466443061829, "learning_rate": 7.171983356449377e-06, "loss": 0.0478, "step": 4088 }, { "epoch": 2.8326983027364045, "grad_norm": 0.682426929473877, "learning_rate": 7.171289875173371e-06, "loss": 0.0432, "step": 4089 }, { "epoch": 2.8333910633876, "grad_norm": 0.7017044425010681, "learning_rate": 7.170596393897366e-06, "loss": 0.0432, "step": 4090 }, { "epoch": 2.8340838240387947, "grad_norm": 0.7831794023513794, "learning_rate": 7.169902912621359e-06, "loss": 0.0616, "step": 4091 }, { "epoch": 2.8347765846899895, "grad_norm": 0.6838694214820862, "learning_rate": 7.169209431345354e-06, "loss": 0.0526, "step": 4092 }, { "epoch": 2.8354693453411848, "grad_norm": 0.7550931572914124, "learning_rate": 7.168515950069349e-06, "loss": 0.0628, "step": 4093 }, { "epoch": 2.8361621059923796, "grad_norm": 0.7070609331130981, "learning_rate": 7.167822468793343e-06, "loss": 0.0441, "step": 4094 }, { "epoch": 2.8368548666435744, "grad_norm": 0.6957732439041138, "learning_rate": 7.167128987517338e-06, "loss": 0.0492, "step": 4095 }, { "epoch": 2.8375476272947697, "grad_norm": 0.7564902305603027, "learning_rate": 7.166435506241332e-06, "loss": 0.0516, "step": 4096 }, { "epoch": 2.8382403879459646, "grad_norm": 0.7061901688575745, "learning_rate": 7.165742024965327e-06, "loss": 0.0496, "step": 4097 }, { "epoch": 2.83893314859716, "grad_norm": 0.6750578880310059, "learning_rate": 7.165048543689321e-06, "loss": 0.0515, "step": 4098 }, { "epoch": 2.8396259092483547, "grad_norm": 0.5834484100341797, "learning_rate": 7.164355062413315e-06, "loss": 0.054, "step": 4099 }, { "epoch": 2.84031866989955, "grad_norm": 0.700020432472229, "learning_rate": 7.16366158113731e-06, "loss": 0.0507, "step": 4100 }, { "epoch": 2.841011430550745, "grad_norm": 0.8128746151924133, "learning_rate": 7.162968099861304e-06, "loss": 0.0525, "step": 4101 }, { "epoch": 2.8417041912019396, "grad_norm": 0.7868772745132446, "learning_rate": 7.162274618585299e-06, "loss": 0.0482, "step": 4102 }, { "epoch": 2.842396951853135, "grad_norm": 0.7407437562942505, "learning_rate": 7.161581137309294e-06, "loss": 0.0605, "step": 4103 }, { "epoch": 2.8430897125043297, "grad_norm": 0.6782182455062866, "learning_rate": 7.160887656033287e-06, "loss": 0.05, "step": 4104 }, { "epoch": 2.8437824731555246, "grad_norm": 0.6876909732818604, "learning_rate": 7.160194174757282e-06, "loss": 0.0498, "step": 4105 }, { "epoch": 2.84447523380672, "grad_norm": 0.6600090861320496, "learning_rate": 7.159500693481276e-06, "loss": 0.0508, "step": 4106 }, { "epoch": 2.8451679944579147, "grad_norm": 0.7737667560577393, "learning_rate": 7.158807212205271e-06, "loss": 0.0566, "step": 4107 }, { "epoch": 2.84586075510911, "grad_norm": 0.734607994556427, "learning_rate": 7.158113730929266e-06, "loss": 0.0527, "step": 4108 }, { "epoch": 2.846553515760305, "grad_norm": 0.6881810426712036, "learning_rate": 7.157420249653259e-06, "loss": 0.0548, "step": 4109 }, { "epoch": 2.8472462764115, "grad_norm": 0.7521769404411316, "learning_rate": 7.156726768377254e-06, "loss": 0.0483, "step": 4110 }, { "epoch": 2.847939037062695, "grad_norm": 0.7764769792556763, "learning_rate": 7.156033287101248e-06, "loss": 0.0694, "step": 4111 }, { "epoch": 2.8486317977138897, "grad_norm": 0.9802010655403137, "learning_rate": 7.155339805825243e-06, "loss": 0.0616, "step": 4112 }, { "epoch": 2.849324558365085, "grad_norm": 0.6978768706321716, "learning_rate": 7.154646324549238e-06, "loss": 0.0486, "step": 4113 }, { "epoch": 2.85001731901628, "grad_norm": 0.6799197793006897, "learning_rate": 7.153952843273232e-06, "loss": 0.053, "step": 4114 }, { "epoch": 2.8507100796674747, "grad_norm": 0.7332054972648621, "learning_rate": 7.153259361997227e-06, "loss": 0.0617, "step": 4115 }, { "epoch": 2.85140284031867, "grad_norm": 0.829636812210083, "learning_rate": 7.1525658807212204e-06, "loss": 0.0674, "step": 4116 }, { "epoch": 2.852095600969865, "grad_norm": 0.7661008238792419, "learning_rate": 7.151872399445215e-06, "loss": 0.0813, "step": 4117 }, { "epoch": 2.85278836162106, "grad_norm": 0.616958498954773, "learning_rate": 7.15117891816921e-06, "loss": 0.0506, "step": 4118 }, { "epoch": 2.853481122272255, "grad_norm": 0.7801319360733032, "learning_rate": 7.150485436893204e-06, "loss": 0.044, "step": 4119 }, { "epoch": 2.85417388292345, "grad_norm": 0.8029889464378357, "learning_rate": 7.149791955617199e-06, "loss": 0.0722, "step": 4120 }, { "epoch": 2.854866643574645, "grad_norm": 0.7517969012260437, "learning_rate": 7.1490984743411925e-06, "loss": 0.0736, "step": 4121 }, { "epoch": 2.85555940422584, "grad_norm": 0.6682642698287964, "learning_rate": 7.1484049930651875e-06, "loss": 0.0401, "step": 4122 }, { "epoch": 2.856252164877035, "grad_norm": 0.6198400855064392, "learning_rate": 7.147711511789182e-06, "loss": 0.0523, "step": 4123 }, { "epoch": 2.85694492552823, "grad_norm": 0.7525218725204468, "learning_rate": 7.1470180305131765e-06, "loss": 0.0591, "step": 4124 }, { "epoch": 2.857637686179425, "grad_norm": 0.6783974170684814, "learning_rate": 7.1463245492371714e-06, "loss": 0.0627, "step": 4125 }, { "epoch": 2.85833044683062, "grad_norm": 0.813218891620636, "learning_rate": 7.1456310679611655e-06, "loss": 0.0694, "step": 4126 }, { "epoch": 2.859023207481815, "grad_norm": 0.7259506583213806, "learning_rate": 7.14493758668516e-06, "loss": 0.0514, "step": 4127 }, { "epoch": 2.85971596813301, "grad_norm": 0.7025173902511597, "learning_rate": 7.1442441054091545e-06, "loss": 0.0607, "step": 4128 }, { "epoch": 2.860408728784205, "grad_norm": 0.9219744801521301, "learning_rate": 7.143550624133149e-06, "loss": 0.0581, "step": 4129 }, { "epoch": 2.8611014894354003, "grad_norm": 0.6772065162658691, "learning_rate": 7.1428571428571436e-06, "loss": 0.0534, "step": 4130 }, { "epoch": 2.861794250086595, "grad_norm": 0.6437684893608093, "learning_rate": 7.142163661581138e-06, "loss": 0.0587, "step": 4131 }, { "epoch": 2.86248701073779, "grad_norm": 0.7225500345230103, "learning_rate": 7.141470180305133e-06, "loss": 0.053, "step": 4132 }, { "epoch": 2.8631797713889853, "grad_norm": 0.6994206309318542, "learning_rate": 7.1407766990291275e-06, "loss": 0.054, "step": 4133 }, { "epoch": 2.86387253204018, "grad_norm": 0.7558009624481201, "learning_rate": 7.140083217753121e-06, "loss": 0.0637, "step": 4134 }, { "epoch": 2.864565292691375, "grad_norm": 0.7665509581565857, "learning_rate": 7.139389736477116e-06, "loss": 0.0653, "step": 4135 }, { "epoch": 2.86525805334257, "grad_norm": 0.8576058745384216, "learning_rate": 7.13869625520111e-06, "loss": 0.0799, "step": 4136 }, { "epoch": 2.865950813993765, "grad_norm": 0.681874692440033, "learning_rate": 7.138002773925105e-06, "loss": 0.0614, "step": 4137 }, { "epoch": 2.86664357464496, "grad_norm": 0.7096306085586548, "learning_rate": 7.1373092926491e-06, "loss": 0.0563, "step": 4138 }, { "epoch": 2.867336335296155, "grad_norm": 0.733961284160614, "learning_rate": 7.136615811373093e-06, "loss": 0.0609, "step": 4139 }, { "epoch": 2.8680290959473504, "grad_norm": 0.7193312644958496, "learning_rate": 7.135922330097088e-06, "loss": 0.05, "step": 4140 }, { "epoch": 2.8687218565985453, "grad_norm": 0.841863214969635, "learning_rate": 7.135228848821082e-06, "loss": 0.06, "step": 4141 }, { "epoch": 2.86941461724974, "grad_norm": 0.7233185172080994, "learning_rate": 7.134535367545077e-06, "loss": 0.0513, "step": 4142 }, { "epoch": 2.8701073779009354, "grad_norm": 0.7611878514289856, "learning_rate": 7.133841886269072e-06, "loss": 0.0614, "step": 4143 }, { "epoch": 2.8708001385521302, "grad_norm": 0.6750400066375732, "learning_rate": 7.133148404993066e-06, "loss": 0.045, "step": 4144 }, { "epoch": 2.871492899203325, "grad_norm": 0.6719549298286438, "learning_rate": 7.132454923717061e-06, "loss": 0.0612, "step": 4145 }, { "epoch": 2.8721856598545203, "grad_norm": 0.6650805473327637, "learning_rate": 7.131761442441054e-06, "loss": 0.0481, "step": 4146 }, { "epoch": 2.872878420505715, "grad_norm": 0.6743792295455933, "learning_rate": 7.131067961165049e-06, "loss": 0.0498, "step": 4147 }, { "epoch": 2.87357118115691, "grad_norm": 0.7500253319740295, "learning_rate": 7.130374479889044e-06, "loss": 0.064, "step": 4148 }, { "epoch": 2.8742639418081053, "grad_norm": 0.6445143818855286, "learning_rate": 7.129680998613038e-06, "loss": 0.0423, "step": 4149 }, { "epoch": 2.8749567024593006, "grad_norm": 0.7735868096351624, "learning_rate": 7.128987517337033e-06, "loss": 0.0643, "step": 4150 }, { "epoch": 2.8756494631104954, "grad_norm": 0.8251103758811951, "learning_rate": 7.128294036061026e-06, "loss": 0.0616, "step": 4151 }, { "epoch": 2.8763422237616902, "grad_norm": 0.7739446759223938, "learning_rate": 7.127600554785021e-06, "loss": 0.0564, "step": 4152 }, { "epoch": 2.8770349844128855, "grad_norm": 0.720413327217102, "learning_rate": 7.126907073509016e-06, "loss": 0.0544, "step": 4153 }, { "epoch": 2.8777277450640804, "grad_norm": 0.7851538062095642, "learning_rate": 7.12621359223301e-06, "loss": 0.0584, "step": 4154 }, { "epoch": 2.878420505715275, "grad_norm": 0.6678401231765747, "learning_rate": 7.125520110957005e-06, "loss": 0.0536, "step": 4155 }, { "epoch": 2.8791132663664705, "grad_norm": 0.6614765524864197, "learning_rate": 7.124826629680998e-06, "loss": 0.0478, "step": 4156 }, { "epoch": 2.8798060270176653, "grad_norm": 0.7225939631462097, "learning_rate": 7.124133148404993e-06, "loss": 0.053, "step": 4157 }, { "epoch": 2.88049878766886, "grad_norm": 0.7855435609817505, "learning_rate": 7.123439667128988e-06, "loss": 0.0667, "step": 4158 }, { "epoch": 2.8811915483200554, "grad_norm": 0.7119595408439636, "learning_rate": 7.122746185852982e-06, "loss": 0.061, "step": 4159 }, { "epoch": 2.8818843089712507, "grad_norm": 0.8931176662445068, "learning_rate": 7.122052704576977e-06, "loss": 0.072, "step": 4160 }, { "epoch": 2.8825770696224455, "grad_norm": 0.8780502080917358, "learning_rate": 7.121359223300971e-06, "loss": 0.0633, "step": 4161 }, { "epoch": 2.8832698302736404, "grad_norm": 0.6471735835075378, "learning_rate": 7.120665742024966e-06, "loss": 0.0423, "step": 4162 }, { "epoch": 2.8839625909248356, "grad_norm": 0.8132211565971375, "learning_rate": 7.119972260748961e-06, "loss": 0.0663, "step": 4163 }, { "epoch": 2.8846553515760305, "grad_norm": 0.9457964301109314, "learning_rate": 7.119278779472954e-06, "loss": 0.0565, "step": 4164 }, { "epoch": 2.8853481122272253, "grad_norm": 0.6737701892852783, "learning_rate": 7.118585298196949e-06, "loss": 0.0548, "step": 4165 }, { "epoch": 2.8860408728784206, "grad_norm": 0.7230066061019897, "learning_rate": 7.117891816920943e-06, "loss": 0.0621, "step": 4166 }, { "epoch": 2.8867336335296154, "grad_norm": 0.8144643306732178, "learning_rate": 7.117198335644938e-06, "loss": 0.0516, "step": 4167 }, { "epoch": 2.8874263941808103, "grad_norm": 0.7155871391296387, "learning_rate": 7.116504854368933e-06, "loss": 0.0458, "step": 4168 }, { "epoch": 2.8881191548320055, "grad_norm": 0.8399498462677002, "learning_rate": 7.1158113730929265e-06, "loss": 0.0661, "step": 4169 }, { "epoch": 2.888811915483201, "grad_norm": 0.763512134552002, "learning_rate": 7.115117891816921e-06, "loss": 0.0485, "step": 4170 }, { "epoch": 2.8895046761343957, "grad_norm": 0.7869910597801208, "learning_rate": 7.1144244105409155e-06, "loss": 0.067, "step": 4171 }, { "epoch": 2.8901974367855905, "grad_norm": 0.7691038846969604, "learning_rate": 7.1137309292649104e-06, "loss": 0.0591, "step": 4172 }, { "epoch": 2.8908901974367858, "grad_norm": 0.911909282207489, "learning_rate": 7.113037447988905e-06, "loss": 0.0581, "step": 4173 }, { "epoch": 2.8915829580879806, "grad_norm": 0.8166438341140747, "learning_rate": 7.1123439667128995e-06, "loss": 0.0611, "step": 4174 }, { "epoch": 2.8922757187391754, "grad_norm": 0.6859363913536072, "learning_rate": 7.1116504854368935e-06, "loss": 0.0539, "step": 4175 }, { "epoch": 2.8929684793903707, "grad_norm": 0.7008057236671448, "learning_rate": 7.110957004160888e-06, "loss": 0.0566, "step": 4176 }, { "epoch": 2.8936612400415656, "grad_norm": 0.760750949382782, "learning_rate": 7.1102635228848826e-06, "loss": 0.0567, "step": 4177 }, { "epoch": 2.8943540006927604, "grad_norm": 0.782202422618866, "learning_rate": 7.1095700416088775e-06, "loss": 0.0566, "step": 4178 }, { "epoch": 2.8950467613439557, "grad_norm": 0.6821407079696655, "learning_rate": 7.108876560332872e-06, "loss": 0.0602, "step": 4179 }, { "epoch": 2.895739521995151, "grad_norm": 0.7670574188232422, "learning_rate": 7.1081830790568665e-06, "loss": 0.0573, "step": 4180 }, { "epoch": 2.896432282646346, "grad_norm": 0.6521784067153931, "learning_rate": 7.10748959778086e-06, "loss": 0.0382, "step": 4181 }, { "epoch": 2.8971250432975406, "grad_norm": 0.7804185748100281, "learning_rate": 7.106796116504855e-06, "loss": 0.0598, "step": 4182 }, { "epoch": 2.897817803948736, "grad_norm": 0.7645688056945801, "learning_rate": 7.10610263522885e-06, "loss": 0.0585, "step": 4183 }, { "epoch": 2.8985105645999307, "grad_norm": 0.7269492149353027, "learning_rate": 7.105409153952844e-06, "loss": 0.0608, "step": 4184 }, { "epoch": 2.8992033252511256, "grad_norm": 0.719804048538208, "learning_rate": 7.104715672676839e-06, "loss": 0.0653, "step": 4185 }, { "epoch": 2.899896085902321, "grad_norm": 0.7282113432884216, "learning_rate": 7.104022191400832e-06, "loss": 0.0591, "step": 4186 }, { "epoch": 2.9005888465535157, "grad_norm": 0.729363739490509, "learning_rate": 7.103328710124827e-06, "loss": 0.054, "step": 4187 }, { "epoch": 2.9012816072047105, "grad_norm": 0.7966277599334717, "learning_rate": 7.102635228848822e-06, "loss": 0.0688, "step": 4188 }, { "epoch": 2.901974367855906, "grad_norm": 0.9300838112831116, "learning_rate": 7.101941747572816e-06, "loss": 0.0559, "step": 4189 }, { "epoch": 2.902667128507101, "grad_norm": 0.777302086353302, "learning_rate": 7.101248266296811e-06, "loss": 0.0712, "step": 4190 }, { "epoch": 2.903359889158296, "grad_norm": 0.785150945186615, "learning_rate": 7.100554785020805e-06, "loss": 0.0558, "step": 4191 }, { "epoch": 2.9040526498094907, "grad_norm": 0.7144959568977356, "learning_rate": 7.0998613037448e-06, "loss": 0.0621, "step": 4192 }, { "epoch": 2.904745410460686, "grad_norm": 0.5931468605995178, "learning_rate": 7.099167822468795e-06, "loss": 0.0442, "step": 4193 }, { "epoch": 2.905438171111881, "grad_norm": 0.8034635186195374, "learning_rate": 7.098474341192788e-06, "loss": 0.061, "step": 4194 }, { "epoch": 2.9061309317630757, "grad_norm": 0.701107382774353, "learning_rate": 7.097780859916783e-06, "loss": 0.0566, "step": 4195 }, { "epoch": 2.906823692414271, "grad_norm": 0.6087248921394348, "learning_rate": 7.097087378640777e-06, "loss": 0.0516, "step": 4196 }, { "epoch": 2.907516453065466, "grad_norm": 0.7192146182060242, "learning_rate": 7.096393897364772e-06, "loss": 0.0553, "step": 4197 }, { "epoch": 2.9082092137166606, "grad_norm": 0.724388837814331, "learning_rate": 7.095700416088767e-06, "loss": 0.075, "step": 4198 }, { "epoch": 2.908901974367856, "grad_norm": 0.6641338467597961, "learning_rate": 7.09500693481276e-06, "loss": 0.0557, "step": 4199 }, { "epoch": 2.909594735019051, "grad_norm": 0.6138565540313721, "learning_rate": 7.094313453536755e-06, "loss": 0.0543, "step": 4200 }, { "epoch": 2.910287495670246, "grad_norm": 0.605147123336792, "learning_rate": 7.093619972260749e-06, "loss": 0.0449, "step": 4201 }, { "epoch": 2.910980256321441, "grad_norm": 0.6798510551452637, "learning_rate": 7.092926490984744e-06, "loss": 0.0477, "step": 4202 }, { "epoch": 2.911673016972636, "grad_norm": 0.7753946781158447, "learning_rate": 7.092233009708739e-06, "loss": 0.0723, "step": 4203 }, { "epoch": 2.912365777623831, "grad_norm": 0.6524571776390076, "learning_rate": 7.091539528432732e-06, "loss": 0.0574, "step": 4204 }, { "epoch": 2.913058538275026, "grad_norm": 0.5583915114402771, "learning_rate": 7.090846047156727e-06, "loss": 0.0473, "step": 4205 }, { "epoch": 2.913751298926221, "grad_norm": 0.8100289106369019, "learning_rate": 7.090152565880721e-06, "loss": 0.0736, "step": 4206 }, { "epoch": 2.914444059577416, "grad_norm": 0.6637985706329346, "learning_rate": 7.089459084604716e-06, "loss": 0.0508, "step": 4207 }, { "epoch": 2.9151368202286108, "grad_norm": 0.8062850832939148, "learning_rate": 7.088765603328711e-06, "loss": 0.0608, "step": 4208 }, { "epoch": 2.915829580879806, "grad_norm": 0.71599280834198, "learning_rate": 7.088072122052705e-06, "loss": 0.0465, "step": 4209 }, { "epoch": 2.9165223415310013, "grad_norm": 0.8217005729675293, "learning_rate": 7.0873786407767e-06, "loss": 0.0792, "step": 4210 }, { "epoch": 2.917215102182196, "grad_norm": 0.6581671833992004, "learning_rate": 7.086685159500693e-06, "loss": 0.0519, "step": 4211 }, { "epoch": 2.917907862833391, "grad_norm": 0.8184213638305664, "learning_rate": 7.085991678224688e-06, "loss": 0.0701, "step": 4212 }, { "epoch": 2.9186006234845863, "grad_norm": 0.6626460552215576, "learning_rate": 7.085298196948683e-06, "loss": 0.0587, "step": 4213 }, { "epoch": 2.919293384135781, "grad_norm": 0.8326358795166016, "learning_rate": 7.084604715672677e-06, "loss": 0.0696, "step": 4214 }, { "epoch": 2.919986144786976, "grad_norm": 0.7991271615028381, "learning_rate": 7.083911234396672e-06, "loss": 0.0597, "step": 4215 }, { "epoch": 2.920678905438171, "grad_norm": 0.6827394366264343, "learning_rate": 7.0832177531206655e-06, "loss": 0.0491, "step": 4216 }, { "epoch": 2.921371666089366, "grad_norm": 0.6888494491577148, "learning_rate": 7.08252427184466e-06, "loss": 0.0546, "step": 4217 }, { "epoch": 2.922064426740561, "grad_norm": 0.6674357056617737, "learning_rate": 7.081830790568655e-06, "loss": 0.0538, "step": 4218 }, { "epoch": 2.922757187391756, "grad_norm": 0.6578598618507385, "learning_rate": 7.0811373092926494e-06, "loss": 0.0526, "step": 4219 }, { "epoch": 2.923449948042951, "grad_norm": 0.7401788234710693, "learning_rate": 7.080443828016644e-06, "loss": 0.0546, "step": 4220 }, { "epoch": 2.9241427086941463, "grad_norm": 0.6942839622497559, "learning_rate": 7.0797503467406385e-06, "loss": 0.0466, "step": 4221 }, { "epoch": 2.924835469345341, "grad_norm": 0.7386929392814636, "learning_rate": 7.079056865464633e-06, "loss": 0.0653, "step": 4222 }, { "epoch": 2.9255282299965364, "grad_norm": 0.9131103754043579, "learning_rate": 7.0783633841886275e-06, "loss": 0.0557, "step": 4223 }, { "epoch": 2.9262209906477312, "grad_norm": 0.8071298003196716, "learning_rate": 7.0776699029126216e-06, "loss": 0.0556, "step": 4224 }, { "epoch": 2.926913751298926, "grad_norm": 0.7347648739814758, "learning_rate": 7.0769764216366165e-06, "loss": 0.0572, "step": 4225 }, { "epoch": 2.9276065119501213, "grad_norm": 0.8403554558753967, "learning_rate": 7.076282940360611e-06, "loss": 0.045, "step": 4226 }, { "epoch": 2.928299272601316, "grad_norm": 0.7159295082092285, "learning_rate": 7.0755894590846055e-06, "loss": 0.0638, "step": 4227 }, { "epoch": 2.928992033252511, "grad_norm": 0.7982239127159119, "learning_rate": 7.0748959778086004e-06, "loss": 0.0652, "step": 4228 }, { "epoch": 2.9296847939037063, "grad_norm": 0.8985694050788879, "learning_rate": 7.074202496532594e-06, "loss": 0.0505, "step": 4229 }, { "epoch": 2.930377554554901, "grad_norm": 0.5979345440864563, "learning_rate": 7.073509015256589e-06, "loss": 0.0401, "step": 4230 }, { "epoch": 2.9310703152060964, "grad_norm": 0.7456455826759338, "learning_rate": 7.072815533980583e-06, "loss": 0.0493, "step": 4231 }, { "epoch": 2.9317630758572912, "grad_norm": 0.6642611622810364, "learning_rate": 7.072122052704578e-06, "loss": 0.055, "step": 4232 }, { "epoch": 2.9324558365084865, "grad_norm": 0.7627031207084656, "learning_rate": 7.0714285714285726e-06, "loss": 0.0686, "step": 4233 }, { "epoch": 2.9331485971596813, "grad_norm": 0.6851139664649963, "learning_rate": 7.070735090152566e-06, "loss": 0.0542, "step": 4234 }, { "epoch": 2.933841357810876, "grad_norm": 0.885277509689331, "learning_rate": 7.070041608876561e-06, "loss": 0.0937, "step": 4235 }, { "epoch": 2.9345341184620715, "grad_norm": 0.8729960918426514, "learning_rate": 7.069348127600555e-06, "loss": 0.0649, "step": 4236 }, { "epoch": 2.9352268791132663, "grad_norm": 0.7657375931739807, "learning_rate": 7.06865464632455e-06, "loss": 0.0663, "step": 4237 }, { "epoch": 2.935919639764461, "grad_norm": 0.6353119611740112, "learning_rate": 7.067961165048545e-06, "loss": 0.0506, "step": 4238 }, { "epoch": 2.9366124004156564, "grad_norm": 0.7135321497917175, "learning_rate": 7.067267683772539e-06, "loss": 0.055, "step": 4239 }, { "epoch": 2.9373051610668512, "grad_norm": 0.7522273063659668, "learning_rate": 7.066574202496534e-06, "loss": 0.0634, "step": 4240 }, { "epoch": 2.9379979217180465, "grad_norm": 0.7556070685386658, "learning_rate": 7.065880721220527e-06, "loss": 0.064, "step": 4241 }, { "epoch": 2.9386906823692414, "grad_norm": 0.7151448726654053, "learning_rate": 7.065187239944522e-06, "loss": 0.0521, "step": 4242 }, { "epoch": 2.9393834430204366, "grad_norm": 0.6853150129318237, "learning_rate": 7.064493758668517e-06, "loss": 0.0514, "step": 4243 }, { "epoch": 2.9400762036716315, "grad_norm": 0.7302935719490051, "learning_rate": 7.063800277392511e-06, "loss": 0.051, "step": 4244 }, { "epoch": 2.9407689643228263, "grad_norm": 0.6857462525367737, "learning_rate": 7.063106796116506e-06, "loss": 0.0495, "step": 4245 }, { "epoch": 2.9414617249740216, "grad_norm": 0.6912993788719177, "learning_rate": 7.062413314840499e-06, "loss": 0.0646, "step": 4246 }, { "epoch": 2.9421544856252164, "grad_norm": 0.6467717289924622, "learning_rate": 7.061719833564494e-06, "loss": 0.0495, "step": 4247 }, { "epoch": 2.9428472462764113, "grad_norm": 0.7279195189476013, "learning_rate": 7.061026352288489e-06, "loss": 0.058, "step": 4248 }, { "epoch": 2.9435400069276065, "grad_norm": 0.8106805682182312, "learning_rate": 7.060332871012483e-06, "loss": 0.0548, "step": 4249 }, { "epoch": 2.9442327675788014, "grad_norm": 0.6478182673454285, "learning_rate": 7.059639389736478e-06, "loss": 0.0555, "step": 4250 }, { "epoch": 2.9449255282299966, "grad_norm": 0.7866591811180115, "learning_rate": 7.058945908460472e-06, "loss": 0.0518, "step": 4251 }, { "epoch": 2.9456182888811915, "grad_norm": 0.6769903302192688, "learning_rate": 7.058252427184467e-06, "loss": 0.0574, "step": 4252 }, { "epoch": 2.9463110495323868, "grad_norm": 0.6291192770004272, "learning_rate": 7.057558945908461e-06, "loss": 0.0512, "step": 4253 }, { "epoch": 2.9470038101835816, "grad_norm": 0.7872965335845947, "learning_rate": 7.056865464632455e-06, "loss": 0.0508, "step": 4254 }, { "epoch": 2.9476965708347764, "grad_norm": 0.7260953783988953, "learning_rate": 7.05617198335645e-06, "loss": 0.0472, "step": 4255 }, { "epoch": 2.9483893314859717, "grad_norm": 0.6566283702850342, "learning_rate": 7.055478502080444e-06, "loss": 0.0469, "step": 4256 }, { "epoch": 2.9490820921371665, "grad_norm": 0.7882493734359741, "learning_rate": 7.054785020804439e-06, "loss": 0.0558, "step": 4257 }, { "epoch": 2.9497748527883614, "grad_norm": 0.8718060255050659, "learning_rate": 7.054091539528434e-06, "loss": 0.0412, "step": 4258 }, { "epoch": 2.9504676134395567, "grad_norm": 0.869996190071106, "learning_rate": 7.053398058252427e-06, "loss": 0.0661, "step": 4259 }, { "epoch": 2.9511603740907515, "grad_norm": 0.7169980406761169, "learning_rate": 7.052704576976422e-06, "loss": 0.0556, "step": 4260 }, { "epoch": 2.9518531347419468, "grad_norm": 0.7835674285888672, "learning_rate": 7.052011095700416e-06, "loss": 0.0589, "step": 4261 }, { "epoch": 2.9525458953931416, "grad_norm": 0.7977503538131714, "learning_rate": 7.051317614424411e-06, "loss": 0.0537, "step": 4262 }, { "epoch": 2.953238656044337, "grad_norm": 0.6363850831985474, "learning_rate": 7.050624133148406e-06, "loss": 0.0463, "step": 4263 }, { "epoch": 2.9539314166955317, "grad_norm": 0.7391969561576843, "learning_rate": 7.049930651872399e-06, "loss": 0.0456, "step": 4264 }, { "epoch": 2.9546241773467266, "grad_norm": 0.8899480700492859, "learning_rate": 7.049237170596394e-06, "loss": 0.051, "step": 4265 }, { "epoch": 2.955316937997922, "grad_norm": 0.8108271360397339, "learning_rate": 7.0485436893203884e-06, "loss": 0.0513, "step": 4266 }, { "epoch": 2.9560096986491167, "grad_norm": 0.6377550959587097, "learning_rate": 7.047850208044383e-06, "loss": 0.0419, "step": 4267 }, { "epoch": 2.9567024593003115, "grad_norm": 0.7099804878234863, "learning_rate": 7.047156726768378e-06, "loss": 0.0541, "step": 4268 }, { "epoch": 2.957395219951507, "grad_norm": 0.6533857583999634, "learning_rate": 7.046463245492372e-06, "loss": 0.044, "step": 4269 }, { "epoch": 2.9580879806027016, "grad_norm": 0.6126710176467896, "learning_rate": 7.045769764216367e-06, "loss": 0.0416, "step": 4270 }, { "epoch": 2.958780741253897, "grad_norm": 0.7803050875663757, "learning_rate": 7.0450762829403606e-06, "loss": 0.0504, "step": 4271 }, { "epoch": 2.9594735019050917, "grad_norm": 0.7303830981254578, "learning_rate": 7.0443828016643555e-06, "loss": 0.0628, "step": 4272 }, { "epoch": 2.960166262556287, "grad_norm": 0.7416102290153503, "learning_rate": 7.0436893203883504e-06, "loss": 0.0591, "step": 4273 }, { "epoch": 2.960859023207482, "grad_norm": 0.9078226089477539, "learning_rate": 7.0429958391123445e-06, "loss": 0.0566, "step": 4274 }, { "epoch": 2.9615517838586767, "grad_norm": 0.7663037776947021, "learning_rate": 7.0423023578363395e-06, "loss": 0.0631, "step": 4275 }, { "epoch": 2.962244544509872, "grad_norm": 0.7635251879692078, "learning_rate": 7.041608876560333e-06, "loss": 0.0668, "step": 4276 }, { "epoch": 2.962937305161067, "grad_norm": 0.7865013480186462, "learning_rate": 7.040915395284328e-06, "loss": 0.0686, "step": 4277 }, { "epoch": 2.9636300658122616, "grad_norm": 0.7668929100036621, "learning_rate": 7.0402219140083226e-06, "loss": 0.0574, "step": 4278 }, { "epoch": 2.964322826463457, "grad_norm": 0.7387571930885315, "learning_rate": 7.039528432732317e-06, "loss": 0.0532, "step": 4279 }, { "epoch": 2.9650155871146517, "grad_norm": 0.71168053150177, "learning_rate": 7.0388349514563116e-06, "loss": 0.0455, "step": 4280 }, { "epoch": 2.965708347765847, "grad_norm": 0.6919817328453064, "learning_rate": 7.038141470180306e-06, "loss": 0.0469, "step": 4281 }, { "epoch": 2.966401108417042, "grad_norm": 0.672911524772644, "learning_rate": 7.0374479889043e-06, "loss": 0.0455, "step": 4282 }, { "epoch": 2.967093869068237, "grad_norm": 0.6759064793586731, "learning_rate": 7.036754507628295e-06, "loss": 0.061, "step": 4283 }, { "epoch": 2.967786629719432, "grad_norm": 0.7007350921630859, "learning_rate": 7.036061026352289e-06, "loss": 0.0607, "step": 4284 }, { "epoch": 2.968479390370627, "grad_norm": 0.681125283241272, "learning_rate": 7.035367545076284e-06, "loss": 0.0529, "step": 4285 }, { "epoch": 2.969172151021822, "grad_norm": 0.6818118095397949, "learning_rate": 7.034674063800278e-06, "loss": 0.0467, "step": 4286 }, { "epoch": 2.969864911673017, "grad_norm": 0.7406013607978821, "learning_rate": 7.033980582524273e-06, "loss": 0.0401, "step": 4287 }, { "epoch": 2.9705576723242118, "grad_norm": 0.5973888039588928, "learning_rate": 7.033287101248268e-06, "loss": 0.0494, "step": 4288 }, { "epoch": 2.971250432975407, "grad_norm": 0.676389217376709, "learning_rate": 7.032593619972261e-06, "loss": 0.0416, "step": 4289 }, { "epoch": 2.971943193626602, "grad_norm": 0.6153448224067688, "learning_rate": 7.031900138696256e-06, "loss": 0.0477, "step": 4290 }, { "epoch": 2.972635954277797, "grad_norm": 0.8072543144226074, "learning_rate": 7.03120665742025e-06, "loss": 0.0766, "step": 4291 }, { "epoch": 2.973328714928992, "grad_norm": 0.8234242796897888, "learning_rate": 7.030513176144245e-06, "loss": 0.0661, "step": 4292 }, { "epoch": 2.9740214755801873, "grad_norm": 0.8944745659828186, "learning_rate": 7.02981969486824e-06, "loss": 0.0539, "step": 4293 }, { "epoch": 2.974714236231382, "grad_norm": 0.7886906266212463, "learning_rate": 7.029126213592233e-06, "loss": 0.0597, "step": 4294 }, { "epoch": 2.975406996882577, "grad_norm": 0.7113933563232422, "learning_rate": 7.028432732316228e-06, "loss": 0.0558, "step": 4295 }, { "epoch": 2.976099757533772, "grad_norm": 0.6581159234046936, "learning_rate": 7.027739251040222e-06, "loss": 0.0441, "step": 4296 }, { "epoch": 2.976792518184967, "grad_norm": 0.6525651216506958, "learning_rate": 7.027045769764217e-06, "loss": 0.0471, "step": 4297 }, { "epoch": 2.977485278836162, "grad_norm": 0.8623197078704834, "learning_rate": 7.026352288488212e-06, "loss": 0.0497, "step": 4298 }, { "epoch": 2.978178039487357, "grad_norm": 0.7325842380523682, "learning_rate": 7.025658807212206e-06, "loss": 0.0577, "step": 4299 }, { "epoch": 2.978870800138552, "grad_norm": 0.7979859709739685, "learning_rate": 7.024965325936201e-06, "loss": 0.0654, "step": 4300 }, { "epoch": 2.9795635607897473, "grad_norm": 0.7051188349723816, "learning_rate": 7.024271844660194e-06, "loss": 0.0598, "step": 4301 }, { "epoch": 2.980256321440942, "grad_norm": 0.6145420670509338, "learning_rate": 7.023578363384189e-06, "loss": 0.0479, "step": 4302 }, { "epoch": 2.9809490820921374, "grad_norm": 0.8106651306152344, "learning_rate": 7.022884882108184e-06, "loss": 0.0475, "step": 4303 }, { "epoch": 2.981641842743332, "grad_norm": 0.6330602169036865, "learning_rate": 7.022191400832178e-06, "loss": 0.0455, "step": 4304 }, { "epoch": 2.982334603394527, "grad_norm": 0.7322973608970642, "learning_rate": 7.021497919556173e-06, "loss": 0.0508, "step": 4305 }, { "epoch": 2.9830273640457223, "grad_norm": 0.5652062892913818, "learning_rate": 7.020804438280166e-06, "loss": 0.0377, "step": 4306 }, { "epoch": 2.983720124696917, "grad_norm": 0.6371831893920898, "learning_rate": 7.020110957004161e-06, "loss": 0.0544, "step": 4307 }, { "epoch": 2.984412885348112, "grad_norm": 0.663821280002594, "learning_rate": 7.019417475728156e-06, "loss": 0.0479, "step": 4308 }, { "epoch": 2.9851056459993073, "grad_norm": 0.7163864970207214, "learning_rate": 7.01872399445215e-06, "loss": 0.0485, "step": 4309 }, { "epoch": 2.985798406650502, "grad_norm": 0.7398287653923035, "learning_rate": 7.018030513176145e-06, "loss": 0.0707, "step": 4310 }, { "epoch": 2.9864911673016974, "grad_norm": 0.6587268710136414, "learning_rate": 7.017337031900138e-06, "loss": 0.0417, "step": 4311 }, { "epoch": 2.9871839279528922, "grad_norm": 0.7377708554267883, "learning_rate": 7.016643550624133e-06, "loss": 0.0634, "step": 4312 }, { "epoch": 2.9878766886040875, "grad_norm": 0.6966869235038757, "learning_rate": 7.015950069348128e-06, "loss": 0.0612, "step": 4313 }, { "epoch": 2.9885694492552823, "grad_norm": 0.6966063380241394, "learning_rate": 7.015256588072122e-06, "loss": 0.0568, "step": 4314 }, { "epoch": 2.989262209906477, "grad_norm": 0.6529871821403503, "learning_rate": 7.014563106796117e-06, "loss": 0.0551, "step": 4315 }, { "epoch": 2.9899549705576725, "grad_norm": 0.6561982035636902, "learning_rate": 7.013869625520111e-06, "loss": 0.0434, "step": 4316 }, { "epoch": 2.9906477312088673, "grad_norm": 0.7706881761550903, "learning_rate": 7.013176144244106e-06, "loss": 0.0445, "step": 4317 }, { "epoch": 2.991340491860062, "grad_norm": 0.6570540070533752, "learning_rate": 7.012482662968101e-06, "loss": 0.0483, "step": 4318 }, { "epoch": 2.9920332525112574, "grad_norm": 0.6899812817573547, "learning_rate": 7.0117891816920945e-06, "loss": 0.0501, "step": 4319 }, { "epoch": 2.9927260131624522, "grad_norm": 0.6786242723464966, "learning_rate": 7.0110957004160894e-06, "loss": 0.0531, "step": 4320 }, { "epoch": 2.9934187738136475, "grad_norm": 0.6994133591651917, "learning_rate": 7.0104022191400835e-06, "loss": 0.0551, "step": 4321 }, { "epoch": 2.9941115344648424, "grad_norm": 0.7612938284873962, "learning_rate": 7.0097087378640785e-06, "loss": 0.0583, "step": 4322 }, { "epoch": 2.9948042951160376, "grad_norm": 0.666746973991394, "learning_rate": 7.009015256588073e-06, "loss": 0.0502, "step": 4323 }, { "epoch": 2.9954970557672325, "grad_norm": 0.7190625071525574, "learning_rate": 7.008321775312067e-06, "loss": 0.0613, "step": 4324 }, { "epoch": 2.9961898164184273, "grad_norm": 0.6754704713821411, "learning_rate": 7.0076282940360616e-06, "loss": 0.0517, "step": 4325 }, { "epoch": 2.9968825770696226, "grad_norm": 0.6130306720733643, "learning_rate": 7.006934812760056e-06, "loss": 0.0473, "step": 4326 }, { "epoch": 2.9975753377208174, "grad_norm": 0.7010437846183777, "learning_rate": 7.006241331484051e-06, "loss": 0.05, "step": 4327 }, { "epoch": 2.9982680983720122, "grad_norm": 0.731165885925293, "learning_rate": 7.0055478502080455e-06, "loss": 0.0523, "step": 4328 }, { "epoch": 2.9989608590232075, "grad_norm": 0.876032292842865, "learning_rate": 7.00485436893204e-06, "loss": 0.0599, "step": 4329 }, { "epoch": 2.9996536196744024, "grad_norm": 0.9039497375488281, "learning_rate": 7.004160887656034e-06, "loss": 0.0774, "step": 4330 }, { "epoch": 2.9996536196744024, "eval_loss": 0.24455492198467255, "eval_runtime": 7659.838, "eval_samples_per_second": 1.044, "eval_steps_per_second": 0.033, "eval_wer": 12.86208462909256, "step": 4330 }, { "epoch": 3.0003463803255976, "grad_norm": 0.6198810935020447, "learning_rate": 7.003467406380028e-06, "loss": 0.0422, "step": 4331 }, { "epoch": 3.0010391409767925, "grad_norm": 0.4685623049736023, "learning_rate": 7.002773925104023e-06, "loss": 0.0315, "step": 4332 }, { "epoch": 3.0017319016279873, "grad_norm": 0.9896730780601501, "learning_rate": 7.002080443828018e-06, "loss": 0.0373, "step": 4333 }, { "epoch": 3.0024246622791826, "grad_norm": 0.6355042457580566, "learning_rate": 7.001386962552012e-06, "loss": 0.0314, "step": 4334 }, { "epoch": 3.0031174229303774, "grad_norm": 0.45164135098457336, "learning_rate": 7.000693481276007e-06, "loss": 0.0297, "step": 4335 }, { "epoch": 3.0038101835815727, "grad_norm": 0.5834810137748718, "learning_rate": 7e-06, "loss": 0.0303, "step": 4336 }, { "epoch": 3.0045029442327675, "grad_norm": 0.458690345287323, "learning_rate": 6.999306518723995e-06, "loss": 0.032, "step": 4337 }, { "epoch": 3.0051957048839624, "grad_norm": 0.5150337815284729, "learning_rate": 6.99861303744799e-06, "loss": 0.0338, "step": 4338 }, { "epoch": 3.0058884655351576, "grad_norm": 0.4686882197856903, "learning_rate": 6.997919556171984e-06, "loss": 0.0246, "step": 4339 }, { "epoch": 3.0065812261863525, "grad_norm": 0.48966941237449646, "learning_rate": 6.997226074895979e-06, "loss": 0.0341, "step": 4340 }, { "epoch": 3.0072739868375478, "grad_norm": 0.5113352537155151, "learning_rate": 6.996532593619972e-06, "loss": 0.0282, "step": 4341 }, { "epoch": 3.0079667474887426, "grad_norm": 0.623214602470398, "learning_rate": 6.995839112343967e-06, "loss": 0.0357, "step": 4342 }, { "epoch": 3.0086595081399374, "grad_norm": 0.5896956920623779, "learning_rate": 6.995145631067962e-06, "loss": 0.0319, "step": 4343 }, { "epoch": 3.0093522687911327, "grad_norm": 0.6594386696815491, "learning_rate": 6.994452149791956e-06, "loss": 0.0354, "step": 4344 }, { "epoch": 3.0100450294423275, "grad_norm": 0.6911342144012451, "learning_rate": 6.993758668515951e-06, "loss": 0.0255, "step": 4345 }, { "epoch": 3.010737790093523, "grad_norm": 0.5812859535217285, "learning_rate": 6.993065187239945e-06, "loss": 0.0361, "step": 4346 }, { "epoch": 3.0114305507447177, "grad_norm": 0.5115344524383545, "learning_rate": 6.99237170596394e-06, "loss": 0.0247, "step": 4347 }, { "epoch": 3.0121233113959125, "grad_norm": 0.5840116739273071, "learning_rate": 6.991678224687935e-06, "loss": 0.0287, "step": 4348 }, { "epoch": 3.0128160720471078, "grad_norm": 0.5522916913032532, "learning_rate": 6.990984743411928e-06, "loss": 0.0258, "step": 4349 }, { "epoch": 3.0135088326983026, "grad_norm": 0.8488218188285828, "learning_rate": 6.990291262135923e-06, "loss": 0.0387, "step": 4350 }, { "epoch": 3.014201593349498, "grad_norm": 0.5356283187866211, "learning_rate": 6.989597780859917e-06, "loss": 0.0373, "step": 4351 }, { "epoch": 3.0148943540006927, "grad_norm": 0.5400470495223999, "learning_rate": 6.988904299583912e-06, "loss": 0.0347, "step": 4352 }, { "epoch": 3.0155871146518876, "grad_norm": 0.4884282648563385, "learning_rate": 6.988210818307907e-06, "loss": 0.0251, "step": 4353 }, { "epoch": 3.016279875303083, "grad_norm": 0.5223973393440247, "learning_rate": 6.9875173370319e-06, "loss": 0.0333, "step": 4354 }, { "epoch": 3.0169726359542777, "grad_norm": 0.6101812124252319, "learning_rate": 6.986823855755895e-06, "loss": 0.024, "step": 4355 }, { "epoch": 3.017665396605473, "grad_norm": 0.604290783405304, "learning_rate": 6.986130374479889e-06, "loss": 0.0272, "step": 4356 }, { "epoch": 3.018358157256668, "grad_norm": 0.5956676006317139, "learning_rate": 6.985436893203884e-06, "loss": 0.0316, "step": 4357 }, { "epoch": 3.0190509179078626, "grad_norm": 0.5751438140869141, "learning_rate": 6.984743411927879e-06, "loss": 0.036, "step": 4358 }, { "epoch": 3.019743678559058, "grad_norm": 0.6272618174552917, "learning_rate": 6.984049930651872e-06, "loss": 0.0336, "step": 4359 }, { "epoch": 3.0204364392102527, "grad_norm": 0.5761919617652893, "learning_rate": 6.983356449375867e-06, "loss": 0.026, "step": 4360 }, { "epoch": 3.021129199861448, "grad_norm": 0.5389761328697205, "learning_rate": 6.982662968099861e-06, "loss": 0.0265, "step": 4361 }, { "epoch": 3.021821960512643, "grad_norm": 0.621613085269928, "learning_rate": 6.981969486823856e-06, "loss": 0.0333, "step": 4362 }, { "epoch": 3.0225147211638377, "grad_norm": 0.44902804493904114, "learning_rate": 6.981276005547851e-06, "loss": 0.0278, "step": 4363 }, { "epoch": 3.023207481815033, "grad_norm": 0.658017635345459, "learning_rate": 6.980582524271845e-06, "loss": 0.0367, "step": 4364 }, { "epoch": 3.023900242466228, "grad_norm": 0.5380011200904846, "learning_rate": 6.97988904299584e-06, "loss": 0.0336, "step": 4365 }, { "epoch": 3.024593003117423, "grad_norm": 0.6718837022781372, "learning_rate": 6.9791955617198335e-06, "loss": 0.0328, "step": 4366 }, { "epoch": 3.025285763768618, "grad_norm": 0.6377173662185669, "learning_rate": 6.9785020804438284e-06, "loss": 0.0352, "step": 4367 }, { "epoch": 3.0259785244198127, "grad_norm": 0.44522419571876526, "learning_rate": 6.977808599167823e-06, "loss": 0.023, "step": 4368 }, { "epoch": 3.026671285071008, "grad_norm": 0.537746250629425, "learning_rate": 6.9771151178918175e-06, "loss": 0.0316, "step": 4369 }, { "epoch": 3.027364045722203, "grad_norm": 0.5502179265022278, "learning_rate": 6.976421636615812e-06, "loss": 0.0331, "step": 4370 }, { "epoch": 3.028056806373398, "grad_norm": 0.602715790271759, "learning_rate": 6.975728155339806e-06, "loss": 0.0254, "step": 4371 }, { "epoch": 3.028749567024593, "grad_norm": 0.5520962476730347, "learning_rate": 6.9750346740638006e-06, "loss": 0.0328, "step": 4372 }, { "epoch": 3.029442327675788, "grad_norm": 0.5209055542945862, "learning_rate": 6.9743411927877955e-06, "loss": 0.0252, "step": 4373 }, { "epoch": 3.030135088326983, "grad_norm": 0.5743737816810608, "learning_rate": 6.97364771151179e-06, "loss": 0.035, "step": 4374 }, { "epoch": 3.030827848978178, "grad_norm": 0.5154269933700562, "learning_rate": 6.9729542302357845e-06, "loss": 0.0238, "step": 4375 }, { "epoch": 3.031520609629373, "grad_norm": 0.5269982218742371, "learning_rate": 6.972260748959779e-06, "loss": 0.027, "step": 4376 }, { "epoch": 3.032213370280568, "grad_norm": 0.5954230427742004, "learning_rate": 6.9715672676837735e-06, "loss": 0.0336, "step": 4377 }, { "epoch": 3.032906130931763, "grad_norm": 0.53389972448349, "learning_rate": 6.970873786407768e-06, "loss": 0.0257, "step": 4378 }, { "epoch": 3.033598891582958, "grad_norm": 0.5607286095619202, "learning_rate": 6.970180305131762e-06, "loss": 0.0397, "step": 4379 }, { "epoch": 3.034291652234153, "grad_norm": 0.5695968270301819, "learning_rate": 6.969486823855757e-06, "loss": 0.0369, "step": 4380 }, { "epoch": 3.0349844128853483, "grad_norm": 0.45668014883995056, "learning_rate": 6.968793342579751e-06, "loss": 0.0265, "step": 4381 }, { "epoch": 3.035677173536543, "grad_norm": 0.462616890668869, "learning_rate": 6.968099861303746e-06, "loss": 0.026, "step": 4382 }, { "epoch": 3.036369934187738, "grad_norm": 0.5351200103759766, "learning_rate": 6.967406380027741e-06, "loss": 0.0243, "step": 4383 }, { "epoch": 3.037062694838933, "grad_norm": 0.5747559070587158, "learning_rate": 6.966712898751734e-06, "loss": 0.0328, "step": 4384 }, { "epoch": 3.037755455490128, "grad_norm": 0.3567298352718353, "learning_rate": 6.966019417475729e-06, "loss": 0.016, "step": 4385 }, { "epoch": 3.0384482161413233, "grad_norm": 0.6422958970069885, "learning_rate": 6.965325936199723e-06, "loss": 0.0256, "step": 4386 }, { "epoch": 3.039140976792518, "grad_norm": 0.7727379202842712, "learning_rate": 6.964632454923718e-06, "loss": 0.0203, "step": 4387 }, { "epoch": 3.039833737443713, "grad_norm": 0.6507742404937744, "learning_rate": 6.963938973647713e-06, "loss": 0.0345, "step": 4388 }, { "epoch": 3.0405264980949083, "grad_norm": 0.48902198672294617, "learning_rate": 6.963245492371706e-06, "loss": 0.025, "step": 4389 }, { "epoch": 3.041219258746103, "grad_norm": 0.527797281742096, "learning_rate": 6.962552011095701e-06, "loss": 0.0286, "step": 4390 }, { "epoch": 3.0419120193972984, "grad_norm": 0.4999096393585205, "learning_rate": 6.961858529819695e-06, "loss": 0.027, "step": 4391 }, { "epoch": 3.042604780048493, "grad_norm": 0.5397186279296875, "learning_rate": 6.96116504854369e-06, "loss": 0.0305, "step": 4392 }, { "epoch": 3.043297540699688, "grad_norm": 0.6479367017745972, "learning_rate": 6.960471567267685e-06, "loss": 0.0343, "step": 4393 }, { "epoch": 3.0439903013508833, "grad_norm": 0.4883427619934082, "learning_rate": 6.959778085991679e-06, "loss": 0.0195, "step": 4394 }, { "epoch": 3.044683062002078, "grad_norm": 0.5787862539291382, "learning_rate": 6.959084604715674e-06, "loss": 0.0311, "step": 4395 }, { "epoch": 3.0453758226532734, "grad_norm": 0.484783411026001, "learning_rate": 6.958391123439667e-06, "loss": 0.0223, "step": 4396 }, { "epoch": 3.0460685833044683, "grad_norm": 0.6791329383850098, "learning_rate": 6.957697642163662e-06, "loss": 0.029, "step": 4397 }, { "epoch": 3.046761343955663, "grad_norm": 0.47099193930625916, "learning_rate": 6.957004160887657e-06, "loss": 0.025, "step": 4398 }, { "epoch": 3.0474541046068584, "grad_norm": 0.5062325596809387, "learning_rate": 6.956310679611651e-06, "loss": 0.0274, "step": 4399 }, { "epoch": 3.0481468652580532, "grad_norm": 0.5030776858329773, "learning_rate": 6.955617198335646e-06, "loss": 0.0308, "step": 4400 }, { "epoch": 3.0488396259092485, "grad_norm": 0.5200594067573547, "learning_rate": 6.954923717059639e-06, "loss": 0.0301, "step": 4401 }, { "epoch": 3.0495323865604433, "grad_norm": 0.4133424758911133, "learning_rate": 6.954230235783634e-06, "loss": 0.0194, "step": 4402 }, { "epoch": 3.050225147211638, "grad_norm": 0.47432467341423035, "learning_rate": 6.953536754507629e-06, "loss": 0.0306, "step": 4403 }, { "epoch": 3.0509179078628335, "grad_norm": 0.5050131678581238, "learning_rate": 6.952843273231623e-06, "loss": 0.0293, "step": 4404 }, { "epoch": 3.0516106685140283, "grad_norm": 0.6067765355110168, "learning_rate": 6.952149791955618e-06, "loss": 0.0358, "step": 4405 }, { "epoch": 3.0523034291652236, "grad_norm": 0.5132820010185242, "learning_rate": 6.951456310679612e-06, "loss": 0.0278, "step": 4406 }, { "epoch": 3.0529961898164184, "grad_norm": 0.5128617286682129, "learning_rate": 6.950762829403606e-06, "loss": 0.0268, "step": 4407 }, { "epoch": 3.0536889504676132, "grad_norm": 0.5275478363037109, "learning_rate": 6.950069348127601e-06, "loss": 0.0255, "step": 4408 }, { "epoch": 3.0543817111188085, "grad_norm": 0.5116069912910461, "learning_rate": 6.949375866851595e-06, "loss": 0.0342, "step": 4409 }, { "epoch": 3.0550744717700034, "grad_norm": 0.591738224029541, "learning_rate": 6.94868238557559e-06, "loss": 0.0293, "step": 4410 }, { "epoch": 3.0557672324211986, "grad_norm": 0.8489149808883667, "learning_rate": 6.947988904299584e-06, "loss": 0.038, "step": 4411 }, { "epoch": 3.0564599930723935, "grad_norm": 0.6078969836235046, "learning_rate": 6.947295423023579e-06, "loss": 0.0438, "step": 4412 }, { "epoch": 3.0571527537235883, "grad_norm": 0.5629372000694275, "learning_rate": 6.946601941747574e-06, "loss": 0.0356, "step": 4413 }, { "epoch": 3.0578455143747836, "grad_norm": 0.5793541669845581, "learning_rate": 6.9459084604715674e-06, "loss": 0.0274, "step": 4414 }, { "epoch": 3.0585382750259784, "grad_norm": 0.4805000126361847, "learning_rate": 6.945214979195562e-06, "loss": 0.0235, "step": 4415 }, { "epoch": 3.0592310356771737, "grad_norm": 0.4964812397956848, "learning_rate": 6.9445214979195565e-06, "loss": 0.0265, "step": 4416 }, { "epoch": 3.0599237963283685, "grad_norm": 0.47828176617622375, "learning_rate": 6.943828016643551e-06, "loss": 0.0265, "step": 4417 }, { "epoch": 3.0606165569795634, "grad_norm": 0.5604119300842285, "learning_rate": 6.943134535367546e-06, "loss": 0.0311, "step": 4418 }, { "epoch": 3.0613093176307586, "grad_norm": 0.551511287689209, "learning_rate": 6.9424410540915396e-06, "loss": 0.0251, "step": 4419 }, { "epoch": 3.0620020782819535, "grad_norm": 0.49019280076026917, "learning_rate": 6.9417475728155345e-06, "loss": 0.0231, "step": 4420 }, { "epoch": 3.0626948389331488, "grad_norm": 0.545318067073822, "learning_rate": 6.941054091539529e-06, "loss": 0.0317, "step": 4421 }, { "epoch": 3.0633875995843436, "grad_norm": 0.504234790802002, "learning_rate": 6.9403606102635235e-06, "loss": 0.0263, "step": 4422 }, { "epoch": 3.0640803602355384, "grad_norm": 0.45859989523887634, "learning_rate": 6.9396671289875184e-06, "loss": 0.0224, "step": 4423 }, { "epoch": 3.0647731208867337, "grad_norm": 0.4985523223876953, "learning_rate": 6.9389736477115125e-06, "loss": 0.0254, "step": 4424 }, { "epoch": 3.0654658815379285, "grad_norm": 0.4899972677230835, "learning_rate": 6.9382801664355075e-06, "loss": 0.0196, "step": 4425 }, { "epoch": 3.066158642189124, "grad_norm": 0.550105631351471, "learning_rate": 6.937586685159501e-06, "loss": 0.0257, "step": 4426 }, { "epoch": 3.0668514028403187, "grad_norm": 0.5070916414260864, "learning_rate": 6.936893203883496e-06, "loss": 0.0209, "step": 4427 }, { "epoch": 3.0675441634915135, "grad_norm": 0.5206437110900879, "learning_rate": 6.9361997226074906e-06, "loss": 0.0268, "step": 4428 }, { "epoch": 3.0682369241427088, "grad_norm": 0.6033762097358704, "learning_rate": 6.935506241331485e-06, "loss": 0.0319, "step": 4429 }, { "epoch": 3.0689296847939036, "grad_norm": 0.6515141725540161, "learning_rate": 6.93481276005548e-06, "loss": 0.0297, "step": 4430 }, { "epoch": 3.069622445445099, "grad_norm": 0.5186117887496948, "learning_rate": 6.934119278779473e-06, "loss": 0.027, "step": 4431 }, { "epoch": 3.0703152060962937, "grad_norm": 0.5309900641441345, "learning_rate": 6.933425797503468e-06, "loss": 0.0318, "step": 4432 }, { "epoch": 3.0710079667474885, "grad_norm": 0.5771611928939819, "learning_rate": 6.932732316227463e-06, "loss": 0.029, "step": 4433 }, { "epoch": 3.071700727398684, "grad_norm": 0.551252543926239, "learning_rate": 6.932038834951457e-06, "loss": 0.0239, "step": 4434 }, { "epoch": 3.0723934880498787, "grad_norm": 0.6136420369148254, "learning_rate": 6.931345353675452e-06, "loss": 0.0407, "step": 4435 }, { "epoch": 3.073086248701074, "grad_norm": 0.5737125277519226, "learning_rate": 6.930651872399445e-06, "loss": 0.0316, "step": 4436 }, { "epoch": 3.0737790093522688, "grad_norm": 0.583035945892334, "learning_rate": 6.92995839112344e-06, "loss": 0.0329, "step": 4437 }, { "epoch": 3.0744717700034636, "grad_norm": 0.5928806662559509, "learning_rate": 6.929264909847435e-06, "loss": 0.0296, "step": 4438 }, { "epoch": 3.075164530654659, "grad_norm": 0.5638597011566162, "learning_rate": 6.928571428571429e-06, "loss": 0.0262, "step": 4439 }, { "epoch": 3.0758572913058537, "grad_norm": 0.6342456936836243, "learning_rate": 6.927877947295424e-06, "loss": 0.0367, "step": 4440 }, { "epoch": 3.076550051957049, "grad_norm": 0.5516890287399292, "learning_rate": 6.927184466019418e-06, "loss": 0.0303, "step": 4441 }, { "epoch": 3.077242812608244, "grad_norm": 0.8059898018836975, "learning_rate": 6.926490984743413e-06, "loss": 0.0357, "step": 4442 }, { "epoch": 3.0779355732594387, "grad_norm": 0.6928458213806152, "learning_rate": 6.925797503467408e-06, "loss": 0.0338, "step": 4443 }, { "epoch": 3.078628333910634, "grad_norm": 0.6038082838058472, "learning_rate": 6.925104022191401e-06, "loss": 0.0225, "step": 4444 }, { "epoch": 3.079321094561829, "grad_norm": 0.44677624106407166, "learning_rate": 6.924410540915396e-06, "loss": 0.0235, "step": 4445 }, { "epoch": 3.080013855213024, "grad_norm": 0.5955592393875122, "learning_rate": 6.92371705963939e-06, "loss": 0.0393, "step": 4446 }, { "epoch": 3.080706615864219, "grad_norm": 0.4668789207935333, "learning_rate": 6.923023578363385e-06, "loss": 0.0279, "step": 4447 }, { "epoch": 3.0813993765154137, "grad_norm": 0.5402735471725464, "learning_rate": 6.92233009708738e-06, "loss": 0.0283, "step": 4448 }, { "epoch": 3.082092137166609, "grad_norm": 0.6488705277442932, "learning_rate": 6.921636615811373e-06, "loss": 0.0216, "step": 4449 }, { "epoch": 3.082784897817804, "grad_norm": 0.665450394153595, "learning_rate": 6.920943134535368e-06, "loss": 0.0311, "step": 4450 }, { "epoch": 3.083477658468999, "grad_norm": 0.5854861736297607, "learning_rate": 6.920249653259362e-06, "loss": 0.0258, "step": 4451 }, { "epoch": 3.084170419120194, "grad_norm": 0.8058608770370483, "learning_rate": 6.919556171983357e-06, "loss": 0.0344, "step": 4452 }, { "epoch": 3.084863179771389, "grad_norm": 0.5175086259841919, "learning_rate": 6.918862690707352e-06, "loss": 0.0234, "step": 4453 }, { "epoch": 3.085555940422584, "grad_norm": 0.4833068549633026, "learning_rate": 6.918169209431346e-06, "loss": 0.0235, "step": 4454 }, { "epoch": 3.086248701073779, "grad_norm": 0.5273468494415283, "learning_rate": 6.91747572815534e-06, "loss": 0.0321, "step": 4455 }, { "epoch": 3.086941461724974, "grad_norm": 0.44908297061920166, "learning_rate": 6.916782246879334e-06, "loss": 0.0273, "step": 4456 }, { "epoch": 3.087634222376169, "grad_norm": 0.5509052276611328, "learning_rate": 6.916088765603329e-06, "loss": 0.035, "step": 4457 }, { "epoch": 3.088326983027364, "grad_norm": 0.5545693635940552, "learning_rate": 6.915395284327324e-06, "loss": 0.0318, "step": 4458 }, { "epoch": 3.089019743678559, "grad_norm": 0.5566913485527039, "learning_rate": 6.914701803051318e-06, "loss": 0.0297, "step": 4459 }, { "epoch": 3.089712504329754, "grad_norm": 0.5710915923118591, "learning_rate": 6.914008321775313e-06, "loss": 0.0291, "step": 4460 }, { "epoch": 3.0904052649809493, "grad_norm": 0.5165517926216125, "learning_rate": 6.9133148404993064e-06, "loss": 0.0253, "step": 4461 }, { "epoch": 3.091098025632144, "grad_norm": 0.5543791651725769, "learning_rate": 6.912621359223301e-06, "loss": 0.0248, "step": 4462 }, { "epoch": 3.091790786283339, "grad_norm": 0.5821418762207031, "learning_rate": 6.911927877947296e-06, "loss": 0.0365, "step": 4463 }, { "epoch": 3.092483546934534, "grad_norm": 0.4341883063316345, "learning_rate": 6.91123439667129e-06, "loss": 0.0227, "step": 4464 }, { "epoch": 3.093176307585729, "grad_norm": 0.5115289688110352, "learning_rate": 6.910540915395285e-06, "loss": 0.0237, "step": 4465 }, { "epoch": 3.0938690682369243, "grad_norm": 0.5711712837219238, "learning_rate": 6.9098474341192786e-06, "loss": 0.0274, "step": 4466 }, { "epoch": 3.094561828888119, "grad_norm": 0.48416972160339355, "learning_rate": 6.9091539528432735e-06, "loss": 0.0237, "step": 4467 }, { "epoch": 3.095254589539314, "grad_norm": 0.6813589930534363, "learning_rate": 6.9084604715672684e-06, "loss": 0.032, "step": 4468 }, { "epoch": 3.0959473501905093, "grad_norm": 0.39871472120285034, "learning_rate": 6.9077669902912625e-06, "loss": 0.0204, "step": 4469 }, { "epoch": 3.096640110841704, "grad_norm": 0.6924740076065063, "learning_rate": 6.9070735090152574e-06, "loss": 0.0406, "step": 4470 }, { "epoch": 3.0973328714928994, "grad_norm": 0.41659021377563477, "learning_rate": 6.9063800277392515e-06, "loss": 0.0195, "step": 4471 }, { "epoch": 3.098025632144094, "grad_norm": 0.5689798593521118, "learning_rate": 6.9056865464632465e-06, "loss": 0.018, "step": 4472 }, { "epoch": 3.098718392795289, "grad_norm": 0.6138285994529724, "learning_rate": 6.904993065187241e-06, "loss": 0.0303, "step": 4473 }, { "epoch": 3.0994111534464843, "grad_norm": 0.48150208592414856, "learning_rate": 6.904299583911235e-06, "loss": 0.0228, "step": 4474 }, { "epoch": 3.100103914097679, "grad_norm": 0.6282781362533569, "learning_rate": 6.9036061026352296e-06, "loss": 0.031, "step": 4475 }, { "epoch": 3.1007966747488744, "grad_norm": 0.4962520897388458, "learning_rate": 6.902912621359224e-06, "loss": 0.0286, "step": 4476 }, { "epoch": 3.1014894354000693, "grad_norm": 0.5997893214225769, "learning_rate": 6.902219140083219e-06, "loss": 0.0352, "step": 4477 }, { "epoch": 3.102182196051264, "grad_norm": 0.5330485105514526, "learning_rate": 6.9015256588072135e-06, "loss": 0.0239, "step": 4478 }, { "epoch": 3.1028749567024594, "grad_norm": 0.5510309934616089, "learning_rate": 6.900832177531207e-06, "loss": 0.0341, "step": 4479 }, { "epoch": 3.103567717353654, "grad_norm": 0.5287234783172607, "learning_rate": 6.900138696255202e-06, "loss": 0.0275, "step": 4480 }, { "epoch": 3.1042604780048495, "grad_norm": 0.473359614610672, "learning_rate": 6.899445214979196e-06, "loss": 0.0274, "step": 4481 }, { "epoch": 3.1049532386560443, "grad_norm": 0.608839213848114, "learning_rate": 6.898751733703191e-06, "loss": 0.0332, "step": 4482 }, { "epoch": 3.105645999307239, "grad_norm": 0.6796181797981262, "learning_rate": 6.898058252427186e-06, "loss": 0.0281, "step": 4483 }, { "epoch": 3.1063387599584344, "grad_norm": 0.7151910066604614, "learning_rate": 6.897364771151179e-06, "loss": 0.0307, "step": 4484 }, { "epoch": 3.1070315206096293, "grad_norm": 0.5276244878768921, "learning_rate": 6.896671289875174e-06, "loss": 0.0271, "step": 4485 }, { "epoch": 3.1077242812608246, "grad_norm": 0.6411841511726379, "learning_rate": 6.895977808599168e-06, "loss": 0.0307, "step": 4486 }, { "epoch": 3.1084170419120194, "grad_norm": 0.584810197353363, "learning_rate": 6.895284327323163e-06, "loss": 0.0366, "step": 4487 }, { "epoch": 3.1091098025632142, "grad_norm": 0.4790900945663452, "learning_rate": 6.894590846047158e-06, "loss": 0.0319, "step": 4488 }, { "epoch": 3.1098025632144095, "grad_norm": 0.5784599781036377, "learning_rate": 6.893897364771152e-06, "loss": 0.0331, "step": 4489 }, { "epoch": 3.1104953238656043, "grad_norm": 0.4899098873138428, "learning_rate": 6.893203883495147e-06, "loss": 0.0244, "step": 4490 }, { "epoch": 3.1111880845167996, "grad_norm": 0.522589921951294, "learning_rate": 6.89251040221914e-06, "loss": 0.03, "step": 4491 }, { "epoch": 3.1118808451679945, "grad_norm": 0.47644123435020447, "learning_rate": 6.891816920943135e-06, "loss": 0.0286, "step": 4492 }, { "epoch": 3.1125736058191893, "grad_norm": 0.49770569801330566, "learning_rate": 6.89112343966713e-06, "loss": 0.0242, "step": 4493 }, { "epoch": 3.1132663664703846, "grad_norm": 0.5135511159896851, "learning_rate": 6.890429958391124e-06, "loss": 0.0295, "step": 4494 }, { "epoch": 3.1139591271215794, "grad_norm": 0.6963201761245728, "learning_rate": 6.889736477115119e-06, "loss": 0.0338, "step": 4495 }, { "epoch": 3.1146518877727747, "grad_norm": 0.4779321253299713, "learning_rate": 6.889042995839112e-06, "loss": 0.0287, "step": 4496 }, { "epoch": 3.1153446484239695, "grad_norm": 0.595662534236908, "learning_rate": 6.888349514563107e-06, "loss": 0.0267, "step": 4497 }, { "epoch": 3.1160374090751644, "grad_norm": 0.637178361415863, "learning_rate": 6.887656033287102e-06, "loss": 0.0332, "step": 4498 }, { "epoch": 3.1167301697263596, "grad_norm": 0.5409026741981506, "learning_rate": 6.886962552011096e-06, "loss": 0.0241, "step": 4499 }, { "epoch": 3.1174229303775545, "grad_norm": 0.8406730890274048, "learning_rate": 6.886269070735091e-06, "loss": 0.0372, "step": 4500 }, { "epoch": 3.1181156910287497, "grad_norm": 0.5198965668678284, "learning_rate": 6.885575589459085e-06, "loss": 0.0321, "step": 4501 }, { "epoch": 3.1188084516799446, "grad_norm": 0.4785172641277313, "learning_rate": 6.88488210818308e-06, "loss": 0.0223, "step": 4502 }, { "epoch": 3.1195012123311394, "grad_norm": 0.5014827847480774, "learning_rate": 6.884188626907074e-06, "loss": 0.0249, "step": 4503 }, { "epoch": 3.1201939729823347, "grad_norm": 0.5796141624450684, "learning_rate": 6.883495145631068e-06, "loss": 0.0307, "step": 4504 }, { "epoch": 3.1208867336335295, "grad_norm": 0.6407247185707092, "learning_rate": 6.882801664355063e-06, "loss": 0.0307, "step": 4505 }, { "epoch": 3.121579494284725, "grad_norm": 0.5700838565826416, "learning_rate": 6.882108183079057e-06, "loss": 0.0359, "step": 4506 }, { "epoch": 3.1222722549359196, "grad_norm": 0.5296303033828735, "learning_rate": 6.881414701803052e-06, "loss": 0.0273, "step": 4507 }, { "epoch": 3.1229650155871145, "grad_norm": 0.5510543584823608, "learning_rate": 6.880721220527047e-06, "loss": 0.0257, "step": 4508 }, { "epoch": 3.1236577762383098, "grad_norm": 0.6245564818382263, "learning_rate": 6.88002773925104e-06, "loss": 0.0243, "step": 4509 }, { "epoch": 3.1243505368895046, "grad_norm": 0.5439155101776123, "learning_rate": 6.879334257975035e-06, "loss": 0.0287, "step": 4510 }, { "epoch": 3.1250432975407, "grad_norm": 0.5894782543182373, "learning_rate": 6.878640776699029e-06, "loss": 0.0334, "step": 4511 }, { "epoch": 3.1257360581918947, "grad_norm": 0.4923941195011139, "learning_rate": 6.877947295423024e-06, "loss": 0.0241, "step": 4512 }, { "epoch": 3.1264288188430895, "grad_norm": 0.4189883768558502, "learning_rate": 6.877253814147019e-06, "loss": 0.02, "step": 4513 }, { "epoch": 3.127121579494285, "grad_norm": 0.5520276427268982, "learning_rate": 6.8765603328710125e-06, "loss": 0.0257, "step": 4514 }, { "epoch": 3.1278143401454797, "grad_norm": 0.5777350664138794, "learning_rate": 6.8758668515950074e-06, "loss": 0.027, "step": 4515 }, { "epoch": 3.128507100796675, "grad_norm": 0.5808957815170288, "learning_rate": 6.8751733703190015e-06, "loss": 0.0354, "step": 4516 }, { "epoch": 3.1291998614478698, "grad_norm": 0.5078085660934448, "learning_rate": 6.8744798890429964e-06, "loss": 0.0323, "step": 4517 }, { "epoch": 3.1298926220990646, "grad_norm": 0.6827059388160706, "learning_rate": 6.873786407766991e-06, "loss": 0.0396, "step": 4518 }, { "epoch": 3.13058538275026, "grad_norm": 0.472192645072937, "learning_rate": 6.8730929264909855e-06, "loss": 0.0206, "step": 4519 }, { "epoch": 3.1312781434014547, "grad_norm": 0.4870913326740265, "learning_rate": 6.87239944521498e-06, "loss": 0.0298, "step": 4520 }, { "epoch": 3.13197090405265, "grad_norm": 0.4849689304828644, "learning_rate": 6.871705963938974e-06, "loss": 0.025, "step": 4521 }, { "epoch": 3.132663664703845, "grad_norm": 0.5785959362983704, "learning_rate": 6.8710124826629686e-06, "loss": 0.0254, "step": 4522 }, { "epoch": 3.1333564253550397, "grad_norm": 0.48383548855781555, "learning_rate": 6.8703190013869635e-06, "loss": 0.0242, "step": 4523 }, { "epoch": 3.134049186006235, "grad_norm": 0.6626015901565552, "learning_rate": 6.869625520110958e-06, "loss": 0.0377, "step": 4524 }, { "epoch": 3.13474194665743, "grad_norm": 0.5851209163665771, "learning_rate": 6.8689320388349525e-06, "loss": 0.0368, "step": 4525 }, { "epoch": 3.135434707308625, "grad_norm": 0.5402666926383972, "learning_rate": 6.868238557558946e-06, "loss": 0.0266, "step": 4526 }, { "epoch": 3.13612746795982, "grad_norm": 0.5914937257766724, "learning_rate": 6.867545076282941e-06, "loss": 0.0315, "step": 4527 }, { "epoch": 3.1368202286110147, "grad_norm": 0.5798299312591553, "learning_rate": 6.866851595006936e-06, "loss": 0.0338, "step": 4528 }, { "epoch": 3.13751298926221, "grad_norm": 0.6052075624465942, "learning_rate": 6.86615811373093e-06, "loss": 0.0291, "step": 4529 }, { "epoch": 3.138205749913405, "grad_norm": 0.5369600653648376, "learning_rate": 6.865464632454925e-06, "loss": 0.0307, "step": 4530 }, { "epoch": 3.1388985105646, "grad_norm": 0.4198368191719055, "learning_rate": 6.864771151178919e-06, "loss": 0.0227, "step": 4531 }, { "epoch": 3.139591271215795, "grad_norm": 0.4274318516254425, "learning_rate": 6.864077669902913e-06, "loss": 0.0204, "step": 4532 }, { "epoch": 3.14028403186699, "grad_norm": 0.5620263814926147, "learning_rate": 6.863384188626908e-06, "loss": 0.0242, "step": 4533 }, { "epoch": 3.140976792518185, "grad_norm": 0.4838273227214813, "learning_rate": 6.862690707350902e-06, "loss": 0.0277, "step": 4534 }, { "epoch": 3.14166955316938, "grad_norm": 0.5215245485305786, "learning_rate": 6.861997226074897e-06, "loss": 0.0322, "step": 4535 }, { "epoch": 3.142362313820575, "grad_norm": 0.6789940595626831, "learning_rate": 6.861303744798891e-06, "loss": 0.028, "step": 4536 }, { "epoch": 3.14305507447177, "grad_norm": 0.6261676549911499, "learning_rate": 6.860610263522886e-06, "loss": 0.0366, "step": 4537 }, { "epoch": 3.143747835122965, "grad_norm": 0.5720306038856506, "learning_rate": 6.859916782246881e-06, "loss": 0.0319, "step": 4538 }, { "epoch": 3.14444059577416, "grad_norm": 0.5291834473609924, "learning_rate": 6.859223300970874e-06, "loss": 0.028, "step": 4539 }, { "epoch": 3.145133356425355, "grad_norm": 0.4897461235523224, "learning_rate": 6.858529819694869e-06, "loss": 0.0233, "step": 4540 }, { "epoch": 3.1458261170765502, "grad_norm": 0.5566214323043823, "learning_rate": 6.857836338418863e-06, "loss": 0.0274, "step": 4541 }, { "epoch": 3.146518877727745, "grad_norm": 0.6070666909217834, "learning_rate": 6.857142857142858e-06, "loss": 0.0309, "step": 4542 }, { "epoch": 3.14721163837894, "grad_norm": 0.7450109720230103, "learning_rate": 6.856449375866853e-06, "loss": 0.0439, "step": 4543 }, { "epoch": 3.147904399030135, "grad_norm": 0.9209780693054199, "learning_rate": 6.855755894590846e-06, "loss": 0.0311, "step": 4544 }, { "epoch": 3.14859715968133, "grad_norm": 0.5648210048675537, "learning_rate": 6.855062413314841e-06, "loss": 0.0313, "step": 4545 }, { "epoch": 3.1492899203325253, "grad_norm": 0.5119938850402832, "learning_rate": 6.854368932038835e-06, "loss": 0.0299, "step": 4546 }, { "epoch": 3.14998268098372, "grad_norm": 0.5387217998504639, "learning_rate": 6.85367545076283e-06, "loss": 0.025, "step": 4547 }, { "epoch": 3.150675441634915, "grad_norm": 0.5152221918106079, "learning_rate": 6.852981969486825e-06, "loss": 0.0281, "step": 4548 }, { "epoch": 3.1513682022861103, "grad_norm": 0.5119185447692871, "learning_rate": 6.852288488210819e-06, "loss": 0.02, "step": 4549 }, { "epoch": 3.152060962937305, "grad_norm": 0.512662947177887, "learning_rate": 6.851595006934814e-06, "loss": 0.0289, "step": 4550 }, { "epoch": 3.1527537235885004, "grad_norm": 0.5649303793907166, "learning_rate": 6.850901525658807e-06, "loss": 0.03, "step": 4551 }, { "epoch": 3.153446484239695, "grad_norm": 0.5543296337127686, "learning_rate": 6.850208044382802e-06, "loss": 0.0345, "step": 4552 }, { "epoch": 3.15413924489089, "grad_norm": 0.6043996214866638, "learning_rate": 6.849514563106797e-06, "loss": 0.0278, "step": 4553 }, { "epoch": 3.1548320055420853, "grad_norm": 0.8201333284378052, "learning_rate": 6.848821081830791e-06, "loss": 0.0336, "step": 4554 }, { "epoch": 3.15552476619328, "grad_norm": 0.5810098052024841, "learning_rate": 6.848127600554786e-06, "loss": 0.0308, "step": 4555 }, { "epoch": 3.1562175268444754, "grad_norm": 0.5499187111854553, "learning_rate": 6.847434119278779e-06, "loss": 0.0345, "step": 4556 }, { "epoch": 3.1569102874956703, "grad_norm": 0.6160211563110352, "learning_rate": 6.846740638002774e-06, "loss": 0.0369, "step": 4557 }, { "epoch": 3.157603048146865, "grad_norm": 0.6396363377571106, "learning_rate": 6.846047156726769e-06, "loss": 0.0375, "step": 4558 }, { "epoch": 3.1582958087980604, "grad_norm": 0.6576462388038635, "learning_rate": 6.845353675450763e-06, "loss": 0.0342, "step": 4559 }, { "epoch": 3.158988569449255, "grad_norm": 0.4900040030479431, "learning_rate": 6.844660194174758e-06, "loss": 0.0267, "step": 4560 }, { "epoch": 3.1596813301004505, "grad_norm": 0.6411131620407104, "learning_rate": 6.843966712898752e-06, "loss": 0.031, "step": 4561 }, { "epoch": 3.1603740907516453, "grad_norm": 0.5328938961029053, "learning_rate": 6.8432732316227464e-06, "loss": 0.0328, "step": 4562 }, { "epoch": 3.16106685140284, "grad_norm": 0.5043810606002808, "learning_rate": 6.842579750346741e-06, "loss": 0.0324, "step": 4563 }, { "epoch": 3.1617596120540354, "grad_norm": 0.5267139077186584, "learning_rate": 6.8418862690707354e-06, "loss": 0.0296, "step": 4564 }, { "epoch": 3.1624523727052303, "grad_norm": 0.4793320596218109, "learning_rate": 6.84119278779473e-06, "loss": 0.0245, "step": 4565 }, { "epoch": 3.1631451333564256, "grad_norm": 0.605859637260437, "learning_rate": 6.8404993065187245e-06, "loss": 0.034, "step": 4566 }, { "epoch": 3.1638378940076204, "grad_norm": 0.5951684713363647, "learning_rate": 6.839805825242719e-06, "loss": 0.0316, "step": 4567 }, { "epoch": 3.1645306546588152, "grad_norm": 0.6127306222915649, "learning_rate": 6.839112343966714e-06, "loss": 0.0357, "step": 4568 }, { "epoch": 3.1652234153100105, "grad_norm": 0.6114452481269836, "learning_rate": 6.8384188626907076e-06, "loss": 0.0294, "step": 4569 }, { "epoch": 3.1659161759612053, "grad_norm": 0.7887547612190247, "learning_rate": 6.8377253814147025e-06, "loss": 0.0286, "step": 4570 }, { "epoch": 3.1666089366124006, "grad_norm": 0.6409960985183716, "learning_rate": 6.837031900138697e-06, "loss": 0.0284, "step": 4571 }, { "epoch": 3.1673016972635955, "grad_norm": 0.4506374001502991, "learning_rate": 6.8363384188626915e-06, "loss": 0.0245, "step": 4572 }, { "epoch": 3.1679944579147903, "grad_norm": 0.8309051990509033, "learning_rate": 6.8356449375866865e-06, "loss": 0.0283, "step": 4573 }, { "epoch": 3.1686872185659856, "grad_norm": 0.5388268828392029, "learning_rate": 6.83495145631068e-06, "loss": 0.0251, "step": 4574 }, { "epoch": 3.1693799792171804, "grad_norm": 0.5915524363517761, "learning_rate": 6.834257975034675e-06, "loss": 0.0277, "step": 4575 }, { "epoch": 3.1700727398683757, "grad_norm": 0.6235132217407227, "learning_rate": 6.833564493758669e-06, "loss": 0.0298, "step": 4576 }, { "epoch": 3.1707655005195705, "grad_norm": 0.5598345398902893, "learning_rate": 6.832871012482664e-06, "loss": 0.0271, "step": 4577 }, { "epoch": 3.1714582611707653, "grad_norm": 0.6950753927230835, "learning_rate": 6.832177531206659e-06, "loss": 0.0384, "step": 4578 }, { "epoch": 3.1721510218219606, "grad_norm": 0.593630850315094, "learning_rate": 6.831484049930653e-06, "loss": 0.0277, "step": 4579 }, { "epoch": 3.1728437824731555, "grad_norm": 0.6155409216880798, "learning_rate": 6.830790568654648e-06, "loss": 0.03, "step": 4580 }, { "epoch": 3.1735365431243507, "grad_norm": 0.6785510182380676, "learning_rate": 6.830097087378641e-06, "loss": 0.0219, "step": 4581 }, { "epoch": 3.1742293037755456, "grad_norm": 0.5539804100990295, "learning_rate": 6.829403606102636e-06, "loss": 0.0251, "step": 4582 }, { "epoch": 3.1749220644267404, "grad_norm": 0.6123777627944946, "learning_rate": 6.828710124826631e-06, "loss": 0.0361, "step": 4583 }, { "epoch": 3.1756148250779357, "grad_norm": 0.5027550458908081, "learning_rate": 6.828016643550625e-06, "loss": 0.0272, "step": 4584 }, { "epoch": 3.1763075857291305, "grad_norm": 0.6634909510612488, "learning_rate": 6.82732316227462e-06, "loss": 0.0361, "step": 4585 }, { "epoch": 3.177000346380326, "grad_norm": 0.5998567342758179, "learning_rate": 6.826629680998613e-06, "loss": 0.0263, "step": 4586 }, { "epoch": 3.1776931070315206, "grad_norm": 0.5641564130783081, "learning_rate": 6.825936199722608e-06, "loss": 0.0241, "step": 4587 }, { "epoch": 3.1783858676827155, "grad_norm": 0.6491827368736267, "learning_rate": 6.825242718446603e-06, "loss": 0.0268, "step": 4588 }, { "epoch": 3.1790786283339107, "grad_norm": 0.5773411393165588, "learning_rate": 6.824549237170597e-06, "loss": 0.0261, "step": 4589 }, { "epoch": 3.1797713889851056, "grad_norm": 0.5505303740501404, "learning_rate": 6.823855755894592e-06, "loss": 0.0247, "step": 4590 }, { "epoch": 3.180464149636301, "grad_norm": 0.49144992232322693, "learning_rate": 6.823162274618585e-06, "loss": 0.0233, "step": 4591 }, { "epoch": 3.1811569102874957, "grad_norm": 0.5164524912834167, "learning_rate": 6.82246879334258e-06, "loss": 0.0316, "step": 4592 }, { "epoch": 3.1818496709386905, "grad_norm": 0.6317377686500549, "learning_rate": 6.821775312066575e-06, "loss": 0.0286, "step": 4593 }, { "epoch": 3.182542431589886, "grad_norm": 0.4366499185562134, "learning_rate": 6.821081830790569e-06, "loss": 0.0219, "step": 4594 }, { "epoch": 3.1832351922410806, "grad_norm": 0.7562986612319946, "learning_rate": 6.820388349514564e-06, "loss": 0.0357, "step": 4595 }, { "epoch": 3.183927952892276, "grad_norm": 0.5120450854301453, "learning_rate": 6.819694868238558e-06, "loss": 0.0321, "step": 4596 }, { "epoch": 3.1846207135434708, "grad_norm": 0.6288981437683105, "learning_rate": 6.819001386962553e-06, "loss": 0.0307, "step": 4597 }, { "epoch": 3.1853134741946656, "grad_norm": 0.5563024878501892, "learning_rate": 6.818307905686548e-06, "loss": 0.0269, "step": 4598 }, { "epoch": 3.186006234845861, "grad_norm": 0.5937584638595581, "learning_rate": 6.817614424410541e-06, "loss": 0.0331, "step": 4599 }, { "epoch": 3.1866989954970557, "grad_norm": 0.6336886286735535, "learning_rate": 6.816920943134536e-06, "loss": 0.0366, "step": 4600 }, { "epoch": 3.187391756148251, "grad_norm": 0.5646620392799377, "learning_rate": 6.81622746185853e-06, "loss": 0.024, "step": 4601 }, { "epoch": 3.188084516799446, "grad_norm": 0.4449456036090851, "learning_rate": 6.815533980582525e-06, "loss": 0.026, "step": 4602 }, { "epoch": 3.1887772774506407, "grad_norm": 0.5936587452888489, "learning_rate": 6.81484049930652e-06, "loss": 0.0328, "step": 4603 }, { "epoch": 3.189470038101836, "grad_norm": 0.5598872900009155, "learning_rate": 6.814147018030513e-06, "loss": 0.0308, "step": 4604 }, { "epoch": 3.1901627987530308, "grad_norm": 0.6282536387443542, "learning_rate": 6.813453536754508e-06, "loss": 0.0311, "step": 4605 }, { "epoch": 3.190855559404226, "grad_norm": 0.79432612657547, "learning_rate": 6.812760055478502e-06, "loss": 0.0305, "step": 4606 }, { "epoch": 3.191548320055421, "grad_norm": 0.5502942800521851, "learning_rate": 6.812066574202497e-06, "loss": 0.0294, "step": 4607 }, { "epoch": 3.1922410807066157, "grad_norm": 0.5728614330291748, "learning_rate": 6.811373092926492e-06, "loss": 0.0279, "step": 4608 }, { "epoch": 3.192933841357811, "grad_norm": 0.5338517427444458, "learning_rate": 6.810679611650486e-06, "loss": 0.0212, "step": 4609 }, { "epoch": 3.193626602009006, "grad_norm": 0.5098572969436646, "learning_rate": 6.80998613037448e-06, "loss": 0.0244, "step": 4610 }, { "epoch": 3.1943193626602007, "grad_norm": 0.557035505771637, "learning_rate": 6.8092926490984744e-06, "loss": 0.0257, "step": 4611 }, { "epoch": 3.195012123311396, "grad_norm": 0.6642405390739441, "learning_rate": 6.808599167822469e-06, "loss": 0.0361, "step": 4612 }, { "epoch": 3.195704883962591, "grad_norm": 0.6308565735816956, "learning_rate": 6.807905686546464e-06, "loss": 0.0228, "step": 4613 }, { "epoch": 3.196397644613786, "grad_norm": 0.6261695027351379, "learning_rate": 6.807212205270458e-06, "loss": 0.035, "step": 4614 }, { "epoch": 3.197090405264981, "grad_norm": 0.5674648284912109, "learning_rate": 6.806518723994453e-06, "loss": 0.0314, "step": 4615 }, { "epoch": 3.197783165916176, "grad_norm": 0.5257681012153625, "learning_rate": 6.8058252427184466e-06, "loss": 0.0287, "step": 4616 }, { "epoch": 3.198475926567371, "grad_norm": 0.5438860058784485, "learning_rate": 6.8051317614424415e-06, "loss": 0.0233, "step": 4617 }, { "epoch": 3.199168687218566, "grad_norm": 0.6246986389160156, "learning_rate": 6.8044382801664364e-06, "loss": 0.0307, "step": 4618 }, { "epoch": 3.199861447869761, "grad_norm": 0.6030694842338562, "learning_rate": 6.8037447988904305e-06, "loss": 0.03, "step": 4619 }, { "epoch": 3.200554208520956, "grad_norm": 0.6196433305740356, "learning_rate": 6.8030513176144255e-06, "loss": 0.0373, "step": 4620 }, { "epoch": 3.201246969172151, "grad_norm": 0.5739027261734009, "learning_rate": 6.802357836338419e-06, "loss": 0.0322, "step": 4621 }, { "epoch": 3.201939729823346, "grad_norm": 0.5053880214691162, "learning_rate": 6.801664355062414e-06, "loss": 0.0283, "step": 4622 }, { "epoch": 3.202632490474541, "grad_norm": 0.46181535720825195, "learning_rate": 6.8009708737864086e-06, "loss": 0.0244, "step": 4623 }, { "epoch": 3.203325251125736, "grad_norm": 0.6071468591690063, "learning_rate": 6.800277392510403e-06, "loss": 0.0298, "step": 4624 }, { "epoch": 3.204018011776931, "grad_norm": 0.4515599012374878, "learning_rate": 6.799583911234398e-06, "loss": 0.0227, "step": 4625 }, { "epoch": 3.2047107724281263, "grad_norm": 0.5076680183410645, "learning_rate": 6.798890429958392e-06, "loss": 0.0206, "step": 4626 }, { "epoch": 3.205403533079321, "grad_norm": 0.5873421430587769, "learning_rate": 6.798196948682387e-06, "loss": 0.0339, "step": 4627 }, { "epoch": 3.206096293730516, "grad_norm": 0.6761807203292847, "learning_rate": 6.7975034674063815e-06, "loss": 0.0279, "step": 4628 }, { "epoch": 3.2067890543817112, "grad_norm": 0.4351724684238434, "learning_rate": 6.796809986130375e-06, "loss": 0.0194, "step": 4629 }, { "epoch": 3.207481815032906, "grad_norm": 0.515922486782074, "learning_rate": 6.79611650485437e-06, "loss": 0.026, "step": 4630 }, { "epoch": 3.208174575684101, "grad_norm": 0.5686444640159607, "learning_rate": 6.795423023578364e-06, "loss": 0.0347, "step": 4631 }, { "epoch": 3.208867336335296, "grad_norm": 0.5975972414016724, "learning_rate": 6.794729542302359e-06, "loss": 0.0415, "step": 4632 }, { "epoch": 3.209560096986491, "grad_norm": 0.5428729057312012, "learning_rate": 6.794036061026354e-06, "loss": 0.0239, "step": 4633 }, { "epoch": 3.2102528576376863, "grad_norm": 0.6052834987640381, "learning_rate": 6.793342579750347e-06, "loss": 0.033, "step": 4634 }, { "epoch": 3.210945618288881, "grad_norm": 0.5841012001037598, "learning_rate": 6.792649098474342e-06, "loss": 0.0341, "step": 4635 }, { "epoch": 3.2116383789400764, "grad_norm": 0.48423075675964355, "learning_rate": 6.791955617198336e-06, "loss": 0.028, "step": 4636 }, { "epoch": 3.2123311395912713, "grad_norm": 0.487452894449234, "learning_rate": 6.791262135922331e-06, "loss": 0.0281, "step": 4637 }, { "epoch": 3.213023900242466, "grad_norm": 0.600516676902771, "learning_rate": 6.790568654646326e-06, "loss": 0.0323, "step": 4638 }, { "epoch": 3.2137166608936614, "grad_norm": 0.6476401090621948, "learning_rate": 6.789875173370319e-06, "loss": 0.035, "step": 4639 }, { "epoch": 3.214409421544856, "grad_norm": 0.5982544422149658, "learning_rate": 6.789181692094314e-06, "loss": 0.0345, "step": 4640 }, { "epoch": 3.215102182196051, "grad_norm": 0.5159494876861572, "learning_rate": 6.788488210818308e-06, "loss": 0.0235, "step": 4641 }, { "epoch": 3.2157949428472463, "grad_norm": 0.628759503364563, "learning_rate": 6.787794729542303e-06, "loss": 0.0303, "step": 4642 }, { "epoch": 3.216487703498441, "grad_norm": 0.4892570376396179, "learning_rate": 6.787101248266298e-06, "loss": 0.0213, "step": 4643 }, { "epoch": 3.2171804641496364, "grad_norm": 0.5421319603919983, "learning_rate": 6.786407766990292e-06, "loss": 0.0296, "step": 4644 }, { "epoch": 3.2178732248008313, "grad_norm": 0.7370326519012451, "learning_rate": 6.785714285714287e-06, "loss": 0.029, "step": 4645 }, { "epoch": 3.2185659854520265, "grad_norm": 0.5076510310173035, "learning_rate": 6.78502080443828e-06, "loss": 0.0331, "step": 4646 }, { "epoch": 3.2192587461032214, "grad_norm": 0.5268062949180603, "learning_rate": 6.784327323162275e-06, "loss": 0.0288, "step": 4647 }, { "epoch": 3.219951506754416, "grad_norm": 0.6819027662277222, "learning_rate": 6.78363384188627e-06, "loss": 0.0386, "step": 4648 }, { "epoch": 3.2206442674056115, "grad_norm": 0.7199299931526184, "learning_rate": 6.782940360610264e-06, "loss": 0.0279, "step": 4649 }, { "epoch": 3.2213370280568063, "grad_norm": 0.5368867516517639, "learning_rate": 6.782246879334259e-06, "loss": 0.0289, "step": 4650 }, { "epoch": 3.222029788708001, "grad_norm": 0.4995492398738861, "learning_rate": 6.781553398058252e-06, "loss": 0.0284, "step": 4651 }, { "epoch": 3.2227225493591964, "grad_norm": 0.5490744709968567, "learning_rate": 6.780859916782247e-06, "loss": 0.0305, "step": 4652 }, { "epoch": 3.2234153100103913, "grad_norm": 0.49523964524269104, "learning_rate": 6.780166435506242e-06, "loss": 0.0312, "step": 4653 }, { "epoch": 3.2241080706615866, "grad_norm": 0.5173907279968262, "learning_rate": 6.779472954230236e-06, "loss": 0.0333, "step": 4654 }, { "epoch": 3.2248008313127814, "grad_norm": 0.521741509437561, "learning_rate": 6.778779472954231e-06, "loss": 0.0227, "step": 4655 }, { "epoch": 3.2254935919639767, "grad_norm": 0.5782747864723206, "learning_rate": 6.778085991678225e-06, "loss": 0.0322, "step": 4656 }, { "epoch": 3.2261863526151715, "grad_norm": 0.605554461479187, "learning_rate": 6.77739251040222e-06, "loss": 0.0284, "step": 4657 }, { "epoch": 3.2268791132663663, "grad_norm": 0.573573112487793, "learning_rate": 6.776699029126214e-06, "loss": 0.0348, "step": 4658 }, { "epoch": 3.2275718739175616, "grad_norm": 0.585656464099884, "learning_rate": 6.776005547850208e-06, "loss": 0.0324, "step": 4659 }, { "epoch": 3.2282646345687565, "grad_norm": 0.48179271817207336, "learning_rate": 6.775312066574203e-06, "loss": 0.0246, "step": 4660 }, { "epoch": 3.2289573952199513, "grad_norm": 0.6250631213188171, "learning_rate": 6.774618585298197e-06, "loss": 0.0369, "step": 4661 }, { "epoch": 3.2296501558711466, "grad_norm": 0.5276439785957336, "learning_rate": 6.773925104022192e-06, "loss": 0.0285, "step": 4662 }, { "epoch": 3.2303429165223414, "grad_norm": 0.5746296644210815, "learning_rate": 6.773231622746187e-06, "loss": 0.0218, "step": 4663 }, { "epoch": 3.2310356771735367, "grad_norm": 0.5363883376121521, "learning_rate": 6.7725381414701805e-06, "loss": 0.0296, "step": 4664 }, { "epoch": 3.2317284378247315, "grad_norm": 0.5687172412872314, "learning_rate": 6.7718446601941754e-06, "loss": 0.0298, "step": 4665 }, { "epoch": 3.232421198475927, "grad_norm": 0.5608294010162354, "learning_rate": 6.7711511789181695e-06, "loss": 0.0245, "step": 4666 }, { "epoch": 3.2331139591271216, "grad_norm": 0.6284404993057251, "learning_rate": 6.7704576976421645e-06, "loss": 0.0245, "step": 4667 }, { "epoch": 3.2338067197783165, "grad_norm": 0.5148667693138123, "learning_rate": 6.769764216366159e-06, "loss": 0.0297, "step": 4668 }, { "epoch": 3.2344994804295117, "grad_norm": 0.7157672047615051, "learning_rate": 6.769070735090153e-06, "loss": 0.0356, "step": 4669 }, { "epoch": 3.2351922410807066, "grad_norm": 0.6483341455459595, "learning_rate": 6.7683772538141476e-06, "loss": 0.037, "step": 4670 }, { "epoch": 3.2358850017319014, "grad_norm": 0.5658396482467651, "learning_rate": 6.767683772538142e-06, "loss": 0.0297, "step": 4671 }, { "epoch": 3.2365777623830967, "grad_norm": 0.6210411190986633, "learning_rate": 6.766990291262137e-06, "loss": 0.0327, "step": 4672 }, { "epoch": 3.2372705230342915, "grad_norm": 0.5706200003623962, "learning_rate": 6.7662968099861315e-06, "loss": 0.034, "step": 4673 }, { "epoch": 3.237963283685487, "grad_norm": 0.6245120763778687, "learning_rate": 6.765603328710126e-06, "loss": 0.0323, "step": 4674 }, { "epoch": 3.2386560443366816, "grad_norm": 0.6629225015640259, "learning_rate": 6.7649098474341205e-06, "loss": 0.0355, "step": 4675 }, { "epoch": 3.239348804987877, "grad_norm": 0.5242277383804321, "learning_rate": 6.764216366158114e-06, "loss": 0.0259, "step": 4676 }, { "epoch": 3.2400415656390718, "grad_norm": 0.5269795060157776, "learning_rate": 6.763522884882109e-06, "loss": 0.0242, "step": 4677 }, { "epoch": 3.2407343262902666, "grad_norm": 0.63178950548172, "learning_rate": 6.762829403606104e-06, "loss": 0.036, "step": 4678 }, { "epoch": 3.241427086941462, "grad_norm": 0.4579290449619293, "learning_rate": 6.762135922330098e-06, "loss": 0.0223, "step": 4679 }, { "epoch": 3.2421198475926567, "grad_norm": 0.5542458891868591, "learning_rate": 6.761442441054093e-06, "loss": 0.0345, "step": 4680 }, { "epoch": 3.2428126082438515, "grad_norm": 0.6242253184318542, "learning_rate": 6.760748959778086e-06, "loss": 0.0412, "step": 4681 }, { "epoch": 3.243505368895047, "grad_norm": 0.4446013271808624, "learning_rate": 6.760055478502081e-06, "loss": 0.02, "step": 4682 }, { "epoch": 3.2441981295462416, "grad_norm": 0.5094194412231445, "learning_rate": 6.759361997226076e-06, "loss": 0.025, "step": 4683 }, { "epoch": 3.244890890197437, "grad_norm": 0.6685901880264282, "learning_rate": 6.75866851595007e-06, "loss": 0.032, "step": 4684 }, { "epoch": 3.2455836508486318, "grad_norm": 0.4762212336063385, "learning_rate": 6.757975034674065e-06, "loss": 0.027, "step": 4685 }, { "epoch": 3.246276411499827, "grad_norm": 0.5818709135055542, "learning_rate": 6.757281553398059e-06, "loss": 0.0304, "step": 4686 }, { "epoch": 3.246969172151022, "grad_norm": 0.5958693623542786, "learning_rate": 6.756588072122053e-06, "loss": 0.0322, "step": 4687 }, { "epoch": 3.2476619328022167, "grad_norm": 0.5693372488021851, "learning_rate": 6.755894590846048e-06, "loss": 0.0287, "step": 4688 }, { "epoch": 3.248354693453412, "grad_norm": 0.5314691662788391, "learning_rate": 6.755201109570042e-06, "loss": 0.0319, "step": 4689 }, { "epoch": 3.249047454104607, "grad_norm": 0.5831869840621948, "learning_rate": 6.754507628294037e-06, "loss": 0.0287, "step": 4690 }, { "epoch": 3.2497402147558017, "grad_norm": 0.5545961856842041, "learning_rate": 6.753814147018031e-06, "loss": 0.0349, "step": 4691 }, { "epoch": 3.250432975406997, "grad_norm": 0.5970706939697266, "learning_rate": 6.753120665742026e-06, "loss": 0.0257, "step": 4692 }, { "epoch": 3.2511257360581918, "grad_norm": 0.558861255645752, "learning_rate": 6.752427184466021e-06, "loss": 0.0307, "step": 4693 }, { "epoch": 3.251818496709387, "grad_norm": 0.5072506070137024, "learning_rate": 6.751733703190014e-06, "loss": 0.0298, "step": 4694 }, { "epoch": 3.252511257360582, "grad_norm": 0.5481452345848083, "learning_rate": 6.751040221914009e-06, "loss": 0.0296, "step": 4695 }, { "epoch": 3.253204018011777, "grad_norm": 0.5336576104164124, "learning_rate": 6.750346740638003e-06, "loss": 0.029, "step": 4696 }, { "epoch": 3.253896778662972, "grad_norm": 0.6487609148025513, "learning_rate": 6.749653259361998e-06, "loss": 0.0276, "step": 4697 }, { "epoch": 3.254589539314167, "grad_norm": 0.603401780128479, "learning_rate": 6.748959778085991e-06, "loss": 0.0268, "step": 4698 }, { "epoch": 3.255282299965362, "grad_norm": 0.6136023998260498, "learning_rate": 6.748266296809986e-06, "loss": 0.0328, "step": 4699 }, { "epoch": 3.255975060616557, "grad_norm": 0.5705479979515076, "learning_rate": 6.747572815533981e-06, "loss": 0.0315, "step": 4700 }, { "epoch": 3.256667821267752, "grad_norm": 0.5006148815155029, "learning_rate": 6.746879334257975e-06, "loss": 0.0331, "step": 4701 }, { "epoch": 3.257360581918947, "grad_norm": 0.7428138256072998, "learning_rate": 6.74618585298197e-06, "loss": 0.0409, "step": 4702 }, { "epoch": 3.258053342570142, "grad_norm": 0.48673370480537415, "learning_rate": 6.745492371705964e-06, "loss": 0.0276, "step": 4703 }, { "epoch": 3.258746103221337, "grad_norm": 0.6352970004081726, "learning_rate": 6.744798890429959e-06, "loss": 0.0273, "step": 4704 }, { "epoch": 3.259438863872532, "grad_norm": 0.5196688175201416, "learning_rate": 6.744105409153954e-06, "loss": 0.0239, "step": 4705 }, { "epoch": 3.2601316245237273, "grad_norm": 0.5542315244674683, "learning_rate": 6.743411927877947e-06, "loss": 0.0253, "step": 4706 }, { "epoch": 3.260824385174922, "grad_norm": 0.6497322916984558, "learning_rate": 6.742718446601942e-06, "loss": 0.0372, "step": 4707 }, { "epoch": 3.261517145826117, "grad_norm": 0.6121558547019958, "learning_rate": 6.742024965325936e-06, "loss": 0.0377, "step": 4708 }, { "epoch": 3.2622099064773122, "grad_norm": 0.5766769051551819, "learning_rate": 6.741331484049931e-06, "loss": 0.0252, "step": 4709 }, { "epoch": 3.262902667128507, "grad_norm": 0.5521363615989685, "learning_rate": 6.740638002773926e-06, "loss": 0.0315, "step": 4710 }, { "epoch": 3.263595427779702, "grad_norm": 0.6215150356292725, "learning_rate": 6.7399445214979195e-06, "loss": 0.0363, "step": 4711 }, { "epoch": 3.264288188430897, "grad_norm": 0.6115555763244629, "learning_rate": 6.7392510402219144e-06, "loss": 0.028, "step": 4712 }, { "epoch": 3.264980949082092, "grad_norm": 0.4691926836967468, "learning_rate": 6.7385575589459085e-06, "loss": 0.0234, "step": 4713 }, { "epoch": 3.2656737097332873, "grad_norm": 0.529046893119812, "learning_rate": 6.7378640776699035e-06, "loss": 0.0288, "step": 4714 }, { "epoch": 3.266366470384482, "grad_norm": 0.5988357067108154, "learning_rate": 6.737170596393898e-06, "loss": 0.0283, "step": 4715 }, { "epoch": 3.2670592310356774, "grad_norm": 0.5368452072143555, "learning_rate": 6.736477115117892e-06, "loss": 0.0253, "step": 4716 }, { "epoch": 3.2677519916868722, "grad_norm": 0.5600354075431824, "learning_rate": 6.7357836338418866e-06, "loss": 0.0251, "step": 4717 }, { "epoch": 3.268444752338067, "grad_norm": 0.5538088083267212, "learning_rate": 6.735090152565881e-06, "loss": 0.0296, "step": 4718 }, { "epoch": 3.2691375129892624, "grad_norm": 0.5466650128364563, "learning_rate": 6.734396671289876e-06, "loss": 0.0261, "step": 4719 }, { "epoch": 3.269830273640457, "grad_norm": 0.590917706489563, "learning_rate": 6.7337031900138705e-06, "loss": 0.0335, "step": 4720 }, { "epoch": 3.270523034291652, "grad_norm": 0.6178110241889954, "learning_rate": 6.733009708737865e-06, "loss": 0.0315, "step": 4721 }, { "epoch": 3.2712157949428473, "grad_norm": 0.564285933971405, "learning_rate": 6.7323162274618595e-06, "loss": 0.0333, "step": 4722 }, { "epoch": 3.271908555594042, "grad_norm": 0.5403800010681152, "learning_rate": 6.731622746185853e-06, "loss": 0.0252, "step": 4723 }, { "epoch": 3.2726013162452374, "grad_norm": 0.6292010545730591, "learning_rate": 6.730929264909848e-06, "loss": 0.0336, "step": 4724 }, { "epoch": 3.2732940768964323, "grad_norm": 0.5613750219345093, "learning_rate": 6.730235783633843e-06, "loss": 0.0227, "step": 4725 }, { "epoch": 3.2739868375476275, "grad_norm": 0.6914113759994507, "learning_rate": 6.729542302357837e-06, "loss": 0.0242, "step": 4726 }, { "epoch": 3.2746795981988224, "grad_norm": 0.5726519823074341, "learning_rate": 6.728848821081832e-06, "loss": 0.0255, "step": 4727 }, { "epoch": 3.275372358850017, "grad_norm": 0.6543517708778381, "learning_rate": 6.728155339805825e-06, "loss": 0.0421, "step": 4728 }, { "epoch": 3.2760651195012125, "grad_norm": 0.5008887648582458, "learning_rate": 6.72746185852982e-06, "loss": 0.0322, "step": 4729 }, { "epoch": 3.2767578801524073, "grad_norm": 0.5868710875511169, "learning_rate": 6.726768377253815e-06, "loss": 0.0334, "step": 4730 }, { "epoch": 3.277450640803602, "grad_norm": 0.6493781208992004, "learning_rate": 6.726074895977809e-06, "loss": 0.0427, "step": 4731 }, { "epoch": 3.2781434014547974, "grad_norm": 0.5291950702667236, "learning_rate": 6.725381414701804e-06, "loss": 0.0289, "step": 4732 }, { "epoch": 3.2788361621059923, "grad_norm": 0.5385832786560059, "learning_rate": 6.724687933425798e-06, "loss": 0.0313, "step": 4733 }, { "epoch": 3.2795289227571875, "grad_norm": 0.6078015565872192, "learning_rate": 6.723994452149793e-06, "loss": 0.0336, "step": 4734 }, { "epoch": 3.2802216834083824, "grad_norm": 0.5908147692680359, "learning_rate": 6.723300970873787e-06, "loss": 0.0394, "step": 4735 }, { "epoch": 3.2809144440595777, "grad_norm": 0.520795464515686, "learning_rate": 6.722607489597781e-06, "loss": 0.0242, "step": 4736 }, { "epoch": 3.2816072047107725, "grad_norm": 0.6225765943527222, "learning_rate": 6.721914008321776e-06, "loss": 0.0323, "step": 4737 }, { "epoch": 3.2822999653619673, "grad_norm": 0.5009455680847168, "learning_rate": 6.72122052704577e-06, "loss": 0.0344, "step": 4738 }, { "epoch": 3.2829927260131626, "grad_norm": 0.4971337914466858, "learning_rate": 6.720527045769765e-06, "loss": 0.0326, "step": 4739 }, { "epoch": 3.2836854866643574, "grad_norm": 0.6354343295097351, "learning_rate": 6.71983356449376e-06, "loss": 0.0327, "step": 4740 }, { "epoch": 3.2843782473155523, "grad_norm": 0.5460329651832581, "learning_rate": 6.719140083217753e-06, "loss": 0.0251, "step": 4741 }, { "epoch": 3.2850710079667476, "grad_norm": 0.5400301814079285, "learning_rate": 6.718446601941748e-06, "loss": 0.0338, "step": 4742 }, { "epoch": 3.2857637686179424, "grad_norm": 0.5009092092514038, "learning_rate": 6.717753120665742e-06, "loss": 0.0286, "step": 4743 }, { "epoch": 3.2864565292691377, "grad_norm": 0.564020037651062, "learning_rate": 6.717059639389737e-06, "loss": 0.0343, "step": 4744 }, { "epoch": 3.2871492899203325, "grad_norm": 0.621777355670929, "learning_rate": 6.716366158113732e-06, "loss": 0.0373, "step": 4745 }, { "epoch": 3.287842050571528, "grad_norm": 0.5231059193611145, "learning_rate": 6.715672676837725e-06, "loss": 0.0311, "step": 4746 }, { "epoch": 3.2885348112227226, "grad_norm": 0.4751788079738617, "learning_rate": 6.71497919556172e-06, "loss": 0.024, "step": 4747 }, { "epoch": 3.2892275718739175, "grad_norm": 0.5766786336898804, "learning_rate": 6.714285714285714e-06, "loss": 0.0367, "step": 4748 }, { "epoch": 3.2899203325251127, "grad_norm": 0.5312018990516663, "learning_rate": 6.713592233009709e-06, "loss": 0.019, "step": 4749 }, { "epoch": 3.2906130931763076, "grad_norm": 0.5392399430274963, "learning_rate": 6.712898751733704e-06, "loss": 0.0265, "step": 4750 }, { "epoch": 3.2913058538275024, "grad_norm": 0.7252957224845886, "learning_rate": 6.712205270457698e-06, "loss": 0.0332, "step": 4751 }, { "epoch": 3.2919986144786977, "grad_norm": 0.57252436876297, "learning_rate": 6.711511789181693e-06, "loss": 0.0282, "step": 4752 }, { "epoch": 3.2926913751298925, "grad_norm": 1.0105564594268799, "learning_rate": 6.710818307905686e-06, "loss": 0.028, "step": 4753 }, { "epoch": 3.293384135781088, "grad_norm": 0.4887373745441437, "learning_rate": 6.710124826629681e-06, "loss": 0.0264, "step": 4754 }, { "epoch": 3.2940768964322826, "grad_norm": 0.5198420882225037, "learning_rate": 6.709431345353676e-06, "loss": 0.0259, "step": 4755 }, { "epoch": 3.2947696570834775, "grad_norm": 0.6026989817619324, "learning_rate": 6.70873786407767e-06, "loss": 0.027, "step": 4756 }, { "epoch": 3.2954624177346727, "grad_norm": 0.5375710129737854, "learning_rate": 6.708044382801665e-06, "loss": 0.0236, "step": 4757 }, { "epoch": 3.2961551783858676, "grad_norm": 0.5420960783958435, "learning_rate": 6.7073509015256585e-06, "loss": 0.0328, "step": 4758 }, { "epoch": 3.296847939037063, "grad_norm": 0.5559707880020142, "learning_rate": 6.7066574202496534e-06, "loss": 0.0301, "step": 4759 }, { "epoch": 3.2975406996882577, "grad_norm": 0.4434460699558258, "learning_rate": 6.705963938973648e-06, "loss": 0.0242, "step": 4760 }, { "epoch": 3.2982334603394525, "grad_norm": 0.5509433746337891, "learning_rate": 6.7052704576976425e-06, "loss": 0.029, "step": 4761 }, { "epoch": 3.298926220990648, "grad_norm": 0.6564376950263977, "learning_rate": 6.704576976421637e-06, "loss": 0.0284, "step": 4762 }, { "epoch": 3.2996189816418426, "grad_norm": 0.5901053547859192, "learning_rate": 6.7038834951456315e-06, "loss": 0.036, "step": 4763 }, { "epoch": 3.300311742293038, "grad_norm": 0.619019091129303, "learning_rate": 6.7031900138696256e-06, "loss": 0.0348, "step": 4764 }, { "epoch": 3.3010045029442328, "grad_norm": 0.6057151556015015, "learning_rate": 6.7024965325936205e-06, "loss": 0.0338, "step": 4765 }, { "epoch": 3.3016972635954276, "grad_norm": 0.7371007800102234, "learning_rate": 6.701803051317615e-06, "loss": 0.045, "step": 4766 }, { "epoch": 3.302390024246623, "grad_norm": 0.5042701959609985, "learning_rate": 6.7011095700416095e-06, "loss": 0.0231, "step": 4767 }, { "epoch": 3.3030827848978177, "grad_norm": 0.4735625088214874, "learning_rate": 6.700416088765604e-06, "loss": 0.024, "step": 4768 }, { "epoch": 3.303775545549013, "grad_norm": 0.669570803642273, "learning_rate": 6.6997226074895985e-06, "loss": 0.0325, "step": 4769 }, { "epoch": 3.304468306200208, "grad_norm": 0.4947521388530731, "learning_rate": 6.6990291262135935e-06, "loss": 0.0261, "step": 4770 }, { "epoch": 3.3051610668514027, "grad_norm": 0.5756155848503113, "learning_rate": 6.698335644937587e-06, "loss": 0.0366, "step": 4771 }, { "epoch": 3.305853827502598, "grad_norm": 0.5192012786865234, "learning_rate": 6.697642163661582e-06, "loss": 0.0229, "step": 4772 }, { "epoch": 3.3065465881537928, "grad_norm": 0.4495704174041748, "learning_rate": 6.696948682385576e-06, "loss": 0.0247, "step": 4773 }, { "epoch": 3.307239348804988, "grad_norm": 1.2129278182983398, "learning_rate": 6.696255201109571e-06, "loss": 0.0288, "step": 4774 }, { "epoch": 3.307932109456183, "grad_norm": 0.4990065097808838, "learning_rate": 6.695561719833566e-06, "loss": 0.0231, "step": 4775 }, { "epoch": 3.3086248701073777, "grad_norm": 0.6738272905349731, "learning_rate": 6.694868238557559e-06, "loss": 0.0325, "step": 4776 }, { "epoch": 3.309317630758573, "grad_norm": 0.4718707799911499, "learning_rate": 6.694174757281554e-06, "loss": 0.0213, "step": 4777 }, { "epoch": 3.310010391409768, "grad_norm": 0.5815008878707886, "learning_rate": 6.693481276005548e-06, "loss": 0.0245, "step": 4778 }, { "epoch": 3.310703152060963, "grad_norm": 0.6399415135383606, "learning_rate": 6.692787794729543e-06, "loss": 0.0285, "step": 4779 }, { "epoch": 3.311395912712158, "grad_norm": 0.5460225939750671, "learning_rate": 6.692094313453538e-06, "loss": 0.0338, "step": 4780 }, { "epoch": 3.3120886733633528, "grad_norm": 0.6218183040618896, "learning_rate": 6.691400832177532e-06, "loss": 0.0367, "step": 4781 }, { "epoch": 3.312781434014548, "grad_norm": 0.5439281463623047, "learning_rate": 6.690707350901527e-06, "loss": 0.0318, "step": 4782 }, { "epoch": 3.313474194665743, "grad_norm": 0.6438165307044983, "learning_rate": 6.69001386962552e-06, "loss": 0.0356, "step": 4783 }, { "epoch": 3.314166955316938, "grad_norm": 0.759637713432312, "learning_rate": 6.689320388349515e-06, "loss": 0.0304, "step": 4784 }, { "epoch": 3.314859715968133, "grad_norm": 0.6359278559684753, "learning_rate": 6.68862690707351e-06, "loss": 0.0277, "step": 4785 }, { "epoch": 3.315552476619328, "grad_norm": 0.6943196654319763, "learning_rate": 6.687933425797504e-06, "loss": 0.0372, "step": 4786 }, { "epoch": 3.316245237270523, "grad_norm": 0.6850655674934387, "learning_rate": 6.687239944521499e-06, "loss": 0.0316, "step": 4787 }, { "epoch": 3.316937997921718, "grad_norm": 0.5700212121009827, "learning_rate": 6.686546463245492e-06, "loss": 0.0278, "step": 4788 }, { "epoch": 3.3176307585729132, "grad_norm": 0.669863224029541, "learning_rate": 6.685852981969487e-06, "loss": 0.028, "step": 4789 }, { "epoch": 3.318323519224108, "grad_norm": 0.5694512724876404, "learning_rate": 6.685159500693482e-06, "loss": 0.0292, "step": 4790 }, { "epoch": 3.319016279875303, "grad_norm": 0.5469205975532532, "learning_rate": 6.684466019417476e-06, "loss": 0.0274, "step": 4791 }, { "epoch": 3.319709040526498, "grad_norm": 0.5163033604621887, "learning_rate": 6.683772538141471e-06, "loss": 0.0247, "step": 4792 }, { "epoch": 3.320401801177693, "grad_norm": 0.5916407704353333, "learning_rate": 6.683079056865464e-06, "loss": 0.0317, "step": 4793 }, { "epoch": 3.3210945618288883, "grad_norm": 0.6355410218238831, "learning_rate": 6.682385575589459e-06, "loss": 0.0348, "step": 4794 }, { "epoch": 3.321787322480083, "grad_norm": 0.6622012853622437, "learning_rate": 6.681692094313454e-06, "loss": 0.0294, "step": 4795 }, { "epoch": 3.322480083131278, "grad_norm": 0.6014128923416138, "learning_rate": 6.680998613037448e-06, "loss": 0.0282, "step": 4796 }, { "epoch": 3.3231728437824732, "grad_norm": 0.580251157283783, "learning_rate": 6.680305131761443e-06, "loss": 0.028, "step": 4797 }, { "epoch": 3.323865604433668, "grad_norm": 0.5978350639343262, "learning_rate": 6.679611650485437e-06, "loss": 0.0304, "step": 4798 }, { "epoch": 3.3245583650848634, "grad_norm": 0.6344513893127441, "learning_rate": 6.678918169209432e-06, "loss": 0.0349, "step": 4799 }, { "epoch": 3.325251125736058, "grad_norm": 0.5302347540855408, "learning_rate": 6.678224687933427e-06, "loss": 0.0243, "step": 4800 }, { "epoch": 3.325943886387253, "grad_norm": 0.8989347219467163, "learning_rate": 6.67753120665742e-06, "loss": 0.0412, "step": 4801 }, { "epoch": 3.3266366470384483, "grad_norm": 0.5310397148132324, "learning_rate": 6.676837725381415e-06, "loss": 0.0233, "step": 4802 }, { "epoch": 3.327329407689643, "grad_norm": 0.44594675302505493, "learning_rate": 6.676144244105409e-06, "loss": 0.0216, "step": 4803 }, { "epoch": 3.3280221683408384, "grad_norm": 0.5148747563362122, "learning_rate": 6.675450762829404e-06, "loss": 0.0242, "step": 4804 }, { "epoch": 3.3287149289920333, "grad_norm": 0.7338892817497253, "learning_rate": 6.674757281553399e-06, "loss": 0.0314, "step": 4805 }, { "epoch": 3.329407689643228, "grad_norm": 0.511072039604187, "learning_rate": 6.6740638002773924e-06, "loss": 0.0206, "step": 4806 }, { "epoch": 3.3301004502944234, "grad_norm": 0.5795922875404358, "learning_rate": 6.673370319001387e-06, "loss": 0.0273, "step": 4807 }, { "epoch": 3.330793210945618, "grad_norm": 0.6510863900184631, "learning_rate": 6.6726768377253815e-06, "loss": 0.0233, "step": 4808 }, { "epoch": 3.3314859715968135, "grad_norm": 0.6126107573509216, "learning_rate": 6.671983356449376e-06, "loss": 0.0284, "step": 4809 }, { "epoch": 3.3321787322480083, "grad_norm": 0.5978538990020752, "learning_rate": 6.671289875173371e-06, "loss": 0.0308, "step": 4810 }, { "epoch": 3.332871492899203, "grad_norm": 0.4925459623336792, "learning_rate": 6.670596393897365e-06, "loss": 0.0282, "step": 4811 }, { "epoch": 3.3335642535503984, "grad_norm": 0.7590802311897278, "learning_rate": 6.6699029126213595e-06, "loss": 0.0356, "step": 4812 }, { "epoch": 3.3342570142015933, "grad_norm": 0.7301770448684692, "learning_rate": 6.669209431345354e-06, "loss": 0.0281, "step": 4813 }, { "epoch": 3.3349497748527885, "grad_norm": 0.44639384746551514, "learning_rate": 6.6685159500693485e-06, "loss": 0.0244, "step": 4814 }, { "epoch": 3.3356425355039834, "grad_norm": 0.5887128114700317, "learning_rate": 6.6678224687933435e-06, "loss": 0.0294, "step": 4815 }, { "epoch": 3.336335296155178, "grad_norm": 0.6999009847640991, "learning_rate": 6.6671289875173375e-06, "loss": 0.0261, "step": 4816 }, { "epoch": 3.3370280568063735, "grad_norm": 0.6228030920028687, "learning_rate": 6.6664355062413325e-06, "loss": 0.039, "step": 4817 }, { "epoch": 3.3377208174575683, "grad_norm": 0.6191381812095642, "learning_rate": 6.665742024965326e-06, "loss": 0.0323, "step": 4818 }, { "epoch": 3.3384135781087636, "grad_norm": 0.6570302844047546, "learning_rate": 6.665048543689321e-06, "loss": 0.0326, "step": 4819 }, { "epoch": 3.3391063387599584, "grad_norm": 0.5036888718605042, "learning_rate": 6.664355062413316e-06, "loss": 0.0225, "step": 4820 }, { "epoch": 3.3397990994111533, "grad_norm": 0.5116766095161438, "learning_rate": 6.66366158113731e-06, "loss": 0.028, "step": 4821 }, { "epoch": 3.3404918600623486, "grad_norm": 0.5185744166374207, "learning_rate": 6.662968099861305e-06, "loss": 0.0226, "step": 4822 }, { "epoch": 3.3411846207135434, "grad_norm": 0.5043239593505859, "learning_rate": 6.662274618585298e-06, "loss": 0.0252, "step": 4823 }, { "epoch": 3.3418773813647387, "grad_norm": 0.5223549008369446, "learning_rate": 6.661581137309293e-06, "loss": 0.0327, "step": 4824 }, { "epoch": 3.3425701420159335, "grad_norm": 0.5926573872566223, "learning_rate": 6.660887656033288e-06, "loss": 0.0433, "step": 4825 }, { "epoch": 3.3432629026671283, "grad_norm": 0.5235224962234497, "learning_rate": 6.660194174757282e-06, "loss": 0.0226, "step": 4826 }, { "epoch": 3.3439556633183236, "grad_norm": 0.6044923663139343, "learning_rate": 6.659500693481277e-06, "loss": 0.0352, "step": 4827 }, { "epoch": 3.3446484239695184, "grad_norm": 0.5524185299873352, "learning_rate": 6.658807212205271e-06, "loss": 0.0227, "step": 4828 }, { "epoch": 3.3453411846207137, "grad_norm": 0.4465431869029999, "learning_rate": 6.658113730929266e-06, "loss": 0.0238, "step": 4829 }, { "epoch": 3.3460339452719086, "grad_norm": 0.6737369894981384, "learning_rate": 6.657420249653261e-06, "loss": 0.0339, "step": 4830 }, { "epoch": 3.3467267059231034, "grad_norm": 0.5326617360115051, "learning_rate": 6.656726768377254e-06, "loss": 0.029, "step": 4831 }, { "epoch": 3.3474194665742987, "grad_norm": 0.5432512760162354, "learning_rate": 6.656033287101249e-06, "loss": 0.0248, "step": 4832 }, { "epoch": 3.3481122272254935, "grad_norm": 0.6148502826690674, "learning_rate": 6.655339805825243e-06, "loss": 0.0311, "step": 4833 }, { "epoch": 3.348804987876689, "grad_norm": 0.5826267004013062, "learning_rate": 6.654646324549238e-06, "loss": 0.0368, "step": 4834 }, { "epoch": 3.3494977485278836, "grad_norm": 0.5656581521034241, "learning_rate": 6.653952843273233e-06, "loss": 0.0267, "step": 4835 }, { "epoch": 3.3501905091790785, "grad_norm": 0.6797323226928711, "learning_rate": 6.653259361997226e-06, "loss": 0.0282, "step": 4836 }, { "epoch": 3.3508832698302737, "grad_norm": 0.6435461044311523, "learning_rate": 6.652565880721221e-06, "loss": 0.035, "step": 4837 }, { "epoch": 3.3515760304814686, "grad_norm": 0.5476511120796204, "learning_rate": 6.651872399445215e-06, "loss": 0.0285, "step": 4838 }, { "epoch": 3.352268791132664, "grad_norm": 0.4512452483177185, "learning_rate": 6.65117891816921e-06, "loss": 0.0207, "step": 4839 }, { "epoch": 3.3529615517838587, "grad_norm": 0.5661075711250305, "learning_rate": 6.650485436893205e-06, "loss": 0.026, "step": 4840 }, { "epoch": 3.3536543124350535, "grad_norm": 0.5836403965950012, "learning_rate": 6.649791955617198e-06, "loss": 0.0269, "step": 4841 }, { "epoch": 3.354347073086249, "grad_norm": 0.6065439581871033, "learning_rate": 6.649098474341193e-06, "loss": 0.0319, "step": 4842 }, { "epoch": 3.3550398337374436, "grad_norm": 0.5517366528511047, "learning_rate": 6.648404993065187e-06, "loss": 0.0324, "step": 4843 }, { "epoch": 3.355732594388639, "grad_norm": 0.4713611900806427, "learning_rate": 6.647711511789182e-06, "loss": 0.0256, "step": 4844 }, { "epoch": 3.3564253550398337, "grad_norm": 0.6643800139427185, "learning_rate": 6.647018030513177e-06, "loss": 0.0294, "step": 4845 }, { "epoch": 3.3571181156910286, "grad_norm": 0.5426997542381287, "learning_rate": 6.646324549237171e-06, "loss": 0.0305, "step": 4846 }, { "epoch": 3.357810876342224, "grad_norm": 0.5457416772842407, "learning_rate": 6.645631067961166e-06, "loss": 0.0293, "step": 4847 }, { "epoch": 3.3585036369934187, "grad_norm": 0.6430615186691284, "learning_rate": 6.644937586685159e-06, "loss": 0.0297, "step": 4848 }, { "epoch": 3.359196397644614, "grad_norm": 0.6658140420913696, "learning_rate": 6.644244105409154e-06, "loss": 0.0296, "step": 4849 }, { "epoch": 3.359889158295809, "grad_norm": 0.6753531098365784, "learning_rate": 6.643550624133149e-06, "loss": 0.0278, "step": 4850 }, { "epoch": 3.3605819189470036, "grad_norm": 0.5653782486915588, "learning_rate": 6.642857142857143e-06, "loss": 0.0279, "step": 4851 }, { "epoch": 3.361274679598199, "grad_norm": 0.7654158473014832, "learning_rate": 6.642163661581138e-06, "loss": 0.0312, "step": 4852 }, { "epoch": 3.3619674402493938, "grad_norm": 0.495338499546051, "learning_rate": 6.6414701803051314e-06, "loss": 0.024, "step": 4853 }, { "epoch": 3.362660200900589, "grad_norm": 0.7510350346565247, "learning_rate": 6.640776699029126e-06, "loss": 0.0263, "step": 4854 }, { "epoch": 3.363352961551784, "grad_norm": 0.5413582921028137, "learning_rate": 6.640083217753121e-06, "loss": 0.0259, "step": 4855 }, { "epoch": 3.3640457222029787, "grad_norm": 0.5555694699287415, "learning_rate": 6.639389736477115e-06, "loss": 0.0291, "step": 4856 }, { "epoch": 3.364738482854174, "grad_norm": 0.6627641916275024, "learning_rate": 6.63869625520111e-06, "loss": 0.0326, "step": 4857 }, { "epoch": 3.365431243505369, "grad_norm": 0.46388813853263855, "learning_rate": 6.638002773925104e-06, "loss": 0.0217, "step": 4858 }, { "epoch": 3.366124004156564, "grad_norm": 0.603958010673523, "learning_rate": 6.637309292649099e-06, "loss": 0.03, "step": 4859 }, { "epoch": 3.366816764807759, "grad_norm": 0.6201016306877136, "learning_rate": 6.6366158113730934e-06, "loss": 0.022, "step": 4860 }, { "epoch": 3.3675095254589538, "grad_norm": 0.5489681363105774, "learning_rate": 6.6359223300970875e-06, "loss": 0.0275, "step": 4861 }, { "epoch": 3.368202286110149, "grad_norm": 0.5528520345687866, "learning_rate": 6.6352288488210825e-06, "loss": 0.0229, "step": 4862 }, { "epoch": 3.368895046761344, "grad_norm": 0.5541024804115295, "learning_rate": 6.6345353675450765e-06, "loss": 0.0317, "step": 4863 }, { "epoch": 3.369587807412539, "grad_norm": 0.519614040851593, "learning_rate": 6.6338418862690715e-06, "loss": 0.0265, "step": 4864 }, { "epoch": 3.370280568063734, "grad_norm": 0.6692903637886047, "learning_rate": 6.633148404993066e-06, "loss": 0.0298, "step": 4865 }, { "epoch": 3.370973328714929, "grad_norm": 0.6233783960342407, "learning_rate": 6.63245492371706e-06, "loss": 0.0327, "step": 4866 }, { "epoch": 3.371666089366124, "grad_norm": 0.6404414176940918, "learning_rate": 6.631761442441055e-06, "loss": 0.0327, "step": 4867 }, { "epoch": 3.372358850017319, "grad_norm": 0.46966442465782166, "learning_rate": 6.631067961165049e-06, "loss": 0.0196, "step": 4868 }, { "epoch": 3.373051610668514, "grad_norm": 0.6829743981361389, "learning_rate": 6.630374479889044e-06, "loss": 0.0379, "step": 4869 }, { "epoch": 3.373744371319709, "grad_norm": 0.5777661800384521, "learning_rate": 6.6296809986130385e-06, "loss": 0.025, "step": 4870 }, { "epoch": 3.374437131970904, "grad_norm": 0.7087007164955139, "learning_rate": 6.628987517337032e-06, "loss": 0.0283, "step": 4871 }, { "epoch": 3.375129892622099, "grad_norm": 0.5589938163757324, "learning_rate": 6.628294036061027e-06, "loss": 0.029, "step": 4872 }, { "epoch": 3.375822653273294, "grad_norm": 0.5609465837478638, "learning_rate": 6.627600554785021e-06, "loss": 0.0313, "step": 4873 }, { "epoch": 3.376515413924489, "grad_norm": 0.5925002098083496, "learning_rate": 6.626907073509016e-06, "loss": 0.0396, "step": 4874 }, { "epoch": 3.377208174575684, "grad_norm": 0.5495500564575195, "learning_rate": 6.626213592233011e-06, "loss": 0.0216, "step": 4875 }, { "epoch": 3.377900935226879, "grad_norm": 0.564579963684082, "learning_rate": 6.625520110957005e-06, "loss": 0.0229, "step": 4876 }, { "epoch": 3.3785936958780742, "grad_norm": 0.5010783076286316, "learning_rate": 6.624826629681e-06, "loss": 0.0222, "step": 4877 }, { "epoch": 3.379286456529269, "grad_norm": 0.6719745993614197, "learning_rate": 6.624133148404993e-06, "loss": 0.0407, "step": 4878 }, { "epoch": 3.3799792171804643, "grad_norm": 0.5503398776054382, "learning_rate": 6.623439667128988e-06, "loss": 0.0295, "step": 4879 }, { "epoch": 3.380671977831659, "grad_norm": 0.5117623805999756, "learning_rate": 6.622746185852983e-06, "loss": 0.0266, "step": 4880 }, { "epoch": 3.381364738482854, "grad_norm": 0.460734486579895, "learning_rate": 6.622052704576977e-06, "loss": 0.0252, "step": 4881 }, { "epoch": 3.3820574991340493, "grad_norm": 0.5623500347137451, "learning_rate": 6.621359223300972e-06, "loss": 0.0277, "step": 4882 }, { "epoch": 3.382750259785244, "grad_norm": 0.623411238193512, "learning_rate": 6.620665742024965e-06, "loss": 0.0263, "step": 4883 }, { "epoch": 3.383443020436439, "grad_norm": 0.6305052638053894, "learning_rate": 6.61997226074896e-06, "loss": 0.0281, "step": 4884 }, { "epoch": 3.3841357810876342, "grad_norm": 0.5371508598327637, "learning_rate": 6.619278779472955e-06, "loss": 0.0243, "step": 4885 }, { "epoch": 3.384828541738829, "grad_norm": 0.5948777794837952, "learning_rate": 6.618585298196949e-06, "loss": 0.0332, "step": 4886 }, { "epoch": 3.3855213023900244, "grad_norm": 0.6408388614654541, "learning_rate": 6.617891816920944e-06, "loss": 0.0306, "step": 4887 }, { "epoch": 3.386214063041219, "grad_norm": 0.4946375787258148, "learning_rate": 6.617198335644938e-06, "loss": 0.0199, "step": 4888 }, { "epoch": 3.3869068236924145, "grad_norm": 0.5868860483169556, "learning_rate": 6.616504854368933e-06, "loss": 0.0331, "step": 4889 }, { "epoch": 3.3875995843436093, "grad_norm": 0.5085358619689941, "learning_rate": 6.615811373092927e-06, "loss": 0.0259, "step": 4890 }, { "epoch": 3.388292344994804, "grad_norm": 0.7379584312438965, "learning_rate": 6.615117891816921e-06, "loss": 0.0324, "step": 4891 }, { "epoch": 3.3889851056459994, "grad_norm": 0.7220065593719482, "learning_rate": 6.614424410540916e-06, "loss": 0.0345, "step": 4892 }, { "epoch": 3.3896778662971943, "grad_norm": 0.646913468837738, "learning_rate": 6.61373092926491e-06, "loss": 0.0353, "step": 4893 }, { "epoch": 3.390370626948389, "grad_norm": 0.9849861264228821, "learning_rate": 6.613037447988905e-06, "loss": 0.0388, "step": 4894 }, { "epoch": 3.3910633875995844, "grad_norm": 0.5091375112533569, "learning_rate": 6.6123439667129e-06, "loss": 0.036, "step": 4895 }, { "epoch": 3.391756148250779, "grad_norm": 0.5635720491409302, "learning_rate": 6.611650485436893e-06, "loss": 0.0294, "step": 4896 }, { "epoch": 3.3924489089019745, "grad_norm": 0.546768069267273, "learning_rate": 6.610957004160888e-06, "loss": 0.029, "step": 4897 }, { "epoch": 3.3931416695531693, "grad_norm": 0.44892817735671997, "learning_rate": 6.610263522884882e-06, "loss": 0.0202, "step": 4898 }, { "epoch": 3.3938344302043646, "grad_norm": 0.6681339144706726, "learning_rate": 6.609570041608877e-06, "loss": 0.0221, "step": 4899 }, { "epoch": 3.3945271908555594, "grad_norm": 0.5175942778587341, "learning_rate": 6.608876560332872e-06, "loss": 0.023, "step": 4900 }, { "epoch": 3.3952199515067543, "grad_norm": 0.5441368818283081, "learning_rate": 6.608183079056865e-06, "loss": 0.0303, "step": 4901 }, { "epoch": 3.3959127121579495, "grad_norm": 0.5968049168586731, "learning_rate": 6.60748959778086e-06, "loss": 0.0267, "step": 4902 }, { "epoch": 3.3966054728091444, "grad_norm": 0.4440753757953644, "learning_rate": 6.606796116504854e-06, "loss": 0.0168, "step": 4903 }, { "epoch": 3.397298233460339, "grad_norm": 0.5775256752967834, "learning_rate": 6.606102635228849e-06, "loss": 0.0328, "step": 4904 }, { "epoch": 3.3979909941115345, "grad_norm": 0.6890320777893066, "learning_rate": 6.605409153952844e-06, "loss": 0.0307, "step": 4905 }, { "epoch": 3.3986837547627293, "grad_norm": 0.504628598690033, "learning_rate": 6.604715672676838e-06, "loss": 0.0248, "step": 4906 }, { "epoch": 3.3993765154139246, "grad_norm": 0.5967927575111389, "learning_rate": 6.604022191400833e-06, "loss": 0.0335, "step": 4907 }, { "epoch": 3.4000692760651194, "grad_norm": 0.6110968589782715, "learning_rate": 6.6033287101248265e-06, "loss": 0.0357, "step": 4908 }, { "epoch": 3.4007620367163147, "grad_norm": 0.5766600370407104, "learning_rate": 6.6026352288488215e-06, "loss": 0.0329, "step": 4909 }, { "epoch": 3.4014547973675096, "grad_norm": 0.5956555008888245, "learning_rate": 6.601941747572816e-06, "loss": 0.0268, "step": 4910 }, { "epoch": 3.4021475580187044, "grad_norm": 0.8311800360679626, "learning_rate": 6.6012482662968105e-06, "loss": 0.0311, "step": 4911 }, { "epoch": 3.4028403186698997, "grad_norm": 0.5553978681564331, "learning_rate": 6.600554785020805e-06, "loss": 0.0338, "step": 4912 }, { "epoch": 3.4035330793210945, "grad_norm": 0.6768361330032349, "learning_rate": 6.599861303744799e-06, "loss": 0.035, "step": 4913 }, { "epoch": 3.4042258399722893, "grad_norm": 0.46058088541030884, "learning_rate": 6.599167822468794e-06, "loss": 0.0233, "step": 4914 }, { "epoch": 3.4049186006234846, "grad_norm": 0.48217880725860596, "learning_rate": 6.5984743411927885e-06, "loss": 0.0184, "step": 4915 }, { "epoch": 3.4056113612746794, "grad_norm": 0.5471254587173462, "learning_rate": 6.597780859916783e-06, "loss": 0.0304, "step": 4916 }, { "epoch": 3.4063041219258747, "grad_norm": 0.5644373893737793, "learning_rate": 6.5970873786407775e-06, "loss": 0.0293, "step": 4917 }, { "epoch": 3.4069968825770696, "grad_norm": 0.6265426874160767, "learning_rate": 6.596393897364772e-06, "loss": 0.0358, "step": 4918 }, { "epoch": 3.407689643228265, "grad_norm": 0.5866125822067261, "learning_rate": 6.595700416088766e-06, "loss": 0.0266, "step": 4919 }, { "epoch": 3.4083824038794597, "grad_norm": 0.5871446132659912, "learning_rate": 6.595006934812761e-06, "loss": 0.0265, "step": 4920 }, { "epoch": 3.4090751645306545, "grad_norm": 0.5365444421768188, "learning_rate": 6.594313453536755e-06, "loss": 0.0221, "step": 4921 }, { "epoch": 3.40976792518185, "grad_norm": 0.465727835893631, "learning_rate": 6.59361997226075e-06, "loss": 0.0243, "step": 4922 }, { "epoch": 3.4104606858330446, "grad_norm": 0.5453728437423706, "learning_rate": 6.592926490984744e-06, "loss": 0.0279, "step": 4923 }, { "epoch": 3.4111534464842395, "grad_norm": 0.5906558036804199, "learning_rate": 6.592233009708739e-06, "loss": 0.0275, "step": 4924 }, { "epoch": 3.4118462071354347, "grad_norm": 0.6555768251419067, "learning_rate": 6.591539528432734e-06, "loss": 0.0237, "step": 4925 }, { "epoch": 3.4125389677866296, "grad_norm": 0.6605638861656189, "learning_rate": 6.590846047156727e-06, "loss": 0.0325, "step": 4926 }, { "epoch": 3.413231728437825, "grad_norm": 0.598610520362854, "learning_rate": 6.590152565880722e-06, "loss": 0.0376, "step": 4927 }, { "epoch": 3.4139244890890197, "grad_norm": 0.6504144072532654, "learning_rate": 6.589459084604716e-06, "loss": 0.0331, "step": 4928 }, { "epoch": 3.414617249740215, "grad_norm": 0.5567007064819336, "learning_rate": 6.588765603328711e-06, "loss": 0.0223, "step": 4929 }, { "epoch": 3.41531001039141, "grad_norm": 0.5931421518325806, "learning_rate": 6.588072122052706e-06, "loss": 0.0338, "step": 4930 }, { "epoch": 3.4160027710426046, "grad_norm": 0.6375455260276794, "learning_rate": 6.587378640776699e-06, "loss": 0.0291, "step": 4931 }, { "epoch": 3.4166955316938, "grad_norm": 0.6157872080802917, "learning_rate": 6.586685159500694e-06, "loss": 0.0323, "step": 4932 }, { "epoch": 3.4173882923449947, "grad_norm": 0.5205309391021729, "learning_rate": 6.585991678224688e-06, "loss": 0.0223, "step": 4933 }, { "epoch": 3.4180810529961896, "grad_norm": 0.521050751209259, "learning_rate": 6.585298196948683e-06, "loss": 0.0283, "step": 4934 }, { "epoch": 3.418773813647385, "grad_norm": 0.6215744614601135, "learning_rate": 6.584604715672678e-06, "loss": 0.0277, "step": 4935 }, { "epoch": 3.4194665742985797, "grad_norm": 0.47959715127944946, "learning_rate": 6.583911234396672e-06, "loss": 0.02, "step": 4936 }, { "epoch": 3.420159334949775, "grad_norm": 0.6303134560585022, "learning_rate": 6.583217753120667e-06, "loss": 0.0374, "step": 4937 }, { "epoch": 3.42085209560097, "grad_norm": 0.5215510129928589, "learning_rate": 6.58252427184466e-06, "loss": 0.0281, "step": 4938 }, { "epoch": 3.421544856252165, "grad_norm": 0.614483118057251, "learning_rate": 6.581830790568655e-06, "loss": 0.0232, "step": 4939 }, { "epoch": 3.42223761690336, "grad_norm": 0.6388923525810242, "learning_rate": 6.58113730929265e-06, "loss": 0.0273, "step": 4940 }, { "epoch": 3.4229303775545548, "grad_norm": 0.6062755584716797, "learning_rate": 6.580443828016644e-06, "loss": 0.0343, "step": 4941 }, { "epoch": 3.42362313820575, "grad_norm": 0.6559286713600159, "learning_rate": 6.579750346740639e-06, "loss": 0.0336, "step": 4942 }, { "epoch": 3.424315898856945, "grad_norm": 0.6256927251815796, "learning_rate": 6.579056865464632e-06, "loss": 0.0353, "step": 4943 }, { "epoch": 3.4250086595081397, "grad_norm": 0.571751594543457, "learning_rate": 6.578363384188627e-06, "loss": 0.0292, "step": 4944 }, { "epoch": 3.425701420159335, "grad_norm": 0.5033543705940247, "learning_rate": 6.577669902912622e-06, "loss": 0.024, "step": 4945 }, { "epoch": 3.42639418081053, "grad_norm": 0.5530136227607727, "learning_rate": 6.576976421636616e-06, "loss": 0.0333, "step": 4946 }, { "epoch": 3.427086941461725, "grad_norm": 0.6370141506195068, "learning_rate": 6.576282940360611e-06, "loss": 0.035, "step": 4947 }, { "epoch": 3.42777970211292, "grad_norm": 0.5829577445983887, "learning_rate": 6.575589459084604e-06, "loss": 0.0289, "step": 4948 }, { "epoch": 3.428472462764115, "grad_norm": 1.0694974660873413, "learning_rate": 6.574895977808599e-06, "loss": 0.0378, "step": 4949 }, { "epoch": 3.42916522341531, "grad_norm": 0.49449989199638367, "learning_rate": 6.574202496532594e-06, "loss": 0.0259, "step": 4950 }, { "epoch": 3.429857984066505, "grad_norm": 0.5746321678161621, "learning_rate": 6.573509015256588e-06, "loss": 0.0357, "step": 4951 }, { "epoch": 3.4305507447177, "grad_norm": 0.5766549706459045, "learning_rate": 6.572815533980583e-06, "loss": 0.0369, "step": 4952 }, { "epoch": 3.431243505368895, "grad_norm": 0.5804804563522339, "learning_rate": 6.572122052704577e-06, "loss": 0.0377, "step": 4953 }, { "epoch": 3.43193626602009, "grad_norm": 0.5649195909500122, "learning_rate": 6.571428571428572e-06, "loss": 0.0225, "step": 4954 }, { "epoch": 3.432629026671285, "grad_norm": 0.6376426815986633, "learning_rate": 6.570735090152567e-06, "loss": 0.0284, "step": 4955 }, { "epoch": 3.43332178732248, "grad_norm": 0.5937965512275696, "learning_rate": 6.5700416088765605e-06, "loss": 0.0327, "step": 4956 }, { "epoch": 3.4340145479736752, "grad_norm": 0.6171954870223999, "learning_rate": 6.569348127600555e-06, "loss": 0.0385, "step": 4957 }, { "epoch": 3.43470730862487, "grad_norm": 0.7144383192062378, "learning_rate": 6.5686546463245495e-06, "loss": 0.0367, "step": 4958 }, { "epoch": 3.4354000692760653, "grad_norm": 0.6109632849693298, "learning_rate": 6.567961165048544e-06, "loss": 0.0359, "step": 4959 }, { "epoch": 3.43609282992726, "grad_norm": 0.6784688830375671, "learning_rate": 6.567267683772539e-06, "loss": 0.0275, "step": 4960 }, { "epoch": 3.436785590578455, "grad_norm": 0.6274477243423462, "learning_rate": 6.566574202496533e-06, "loss": 0.0341, "step": 4961 }, { "epoch": 3.4374783512296503, "grad_norm": 0.5536825656890869, "learning_rate": 6.5658807212205275e-06, "loss": 0.0248, "step": 4962 }, { "epoch": 3.438171111880845, "grad_norm": 0.5654683113098145, "learning_rate": 6.565187239944522e-06, "loss": 0.0381, "step": 4963 }, { "epoch": 3.43886387253204, "grad_norm": 0.6740880012512207, "learning_rate": 6.5644937586685165e-06, "loss": 0.0247, "step": 4964 }, { "epoch": 3.4395566331832352, "grad_norm": 0.6297820210456848, "learning_rate": 6.5638002773925115e-06, "loss": 0.031, "step": 4965 }, { "epoch": 3.44024939383443, "grad_norm": 0.5881513357162476, "learning_rate": 6.5631067961165056e-06, "loss": 0.0326, "step": 4966 }, { "epoch": 3.4409421544856253, "grad_norm": 0.6150519847869873, "learning_rate": 6.5624133148405e-06, "loss": 0.0377, "step": 4967 }, { "epoch": 3.44163491513682, "grad_norm": 0.5904532074928284, "learning_rate": 6.561719833564494e-06, "loss": 0.0304, "step": 4968 }, { "epoch": 3.4423276757880155, "grad_norm": 0.5624169111251831, "learning_rate": 6.561026352288489e-06, "loss": 0.0307, "step": 4969 }, { "epoch": 3.4430204364392103, "grad_norm": 0.7530590891838074, "learning_rate": 6.560332871012484e-06, "loss": 0.0328, "step": 4970 }, { "epoch": 3.443713197090405, "grad_norm": 0.6149039268493652, "learning_rate": 6.559639389736478e-06, "loss": 0.0247, "step": 4971 }, { "epoch": 3.4444059577416004, "grad_norm": 0.6508607864379883, "learning_rate": 6.558945908460473e-06, "loss": 0.0268, "step": 4972 }, { "epoch": 3.4450987183927952, "grad_norm": 0.6199063062667847, "learning_rate": 6.558252427184466e-06, "loss": 0.0278, "step": 4973 }, { "epoch": 3.44579147904399, "grad_norm": 0.695668637752533, "learning_rate": 6.557558945908461e-06, "loss": 0.0411, "step": 4974 }, { "epoch": 3.4464842396951854, "grad_norm": 0.6539093852043152, "learning_rate": 6.556865464632456e-06, "loss": 0.0326, "step": 4975 }, { "epoch": 3.44717700034638, "grad_norm": 0.5350441932678223, "learning_rate": 6.55617198335645e-06, "loss": 0.0255, "step": 4976 }, { "epoch": 3.4478697609975755, "grad_norm": 0.6660082340240479, "learning_rate": 6.555478502080445e-06, "loss": 0.0379, "step": 4977 }, { "epoch": 3.4485625216487703, "grad_norm": 0.6372496485710144, "learning_rate": 6.554785020804438e-06, "loss": 0.024, "step": 4978 }, { "epoch": 3.4492552822999656, "grad_norm": 0.8061148524284363, "learning_rate": 6.554091539528433e-06, "loss": 0.0353, "step": 4979 }, { "epoch": 3.4499480429511604, "grad_norm": 0.5602715611457825, "learning_rate": 6.553398058252428e-06, "loss": 0.0229, "step": 4980 }, { "epoch": 3.4506408036023553, "grad_norm": 0.610578715801239, "learning_rate": 6.552704576976422e-06, "loss": 0.0353, "step": 4981 }, { "epoch": 3.4513335642535505, "grad_norm": 0.585390031337738, "learning_rate": 6.552011095700417e-06, "loss": 0.031, "step": 4982 }, { "epoch": 3.4520263249047454, "grad_norm": 0.5611028075218201, "learning_rate": 6.551317614424411e-06, "loss": 0.0278, "step": 4983 }, { "epoch": 3.45271908555594, "grad_norm": 0.8584210872650146, "learning_rate": 6.550624133148406e-06, "loss": 0.0362, "step": 4984 }, { "epoch": 3.4534118462071355, "grad_norm": 0.6460939049720764, "learning_rate": 6.549930651872401e-06, "loss": 0.0327, "step": 4985 }, { "epoch": 3.4541046068583303, "grad_norm": 0.5460928082466125, "learning_rate": 6.549237170596394e-06, "loss": 0.0314, "step": 4986 }, { "epoch": 3.4547973675095256, "grad_norm": 0.4444792568683624, "learning_rate": 6.548543689320389e-06, "loss": 0.0232, "step": 4987 }, { "epoch": 3.4554901281607204, "grad_norm": 0.5863757729530334, "learning_rate": 6.547850208044383e-06, "loss": 0.0225, "step": 4988 }, { "epoch": 3.4561828888119157, "grad_norm": 0.6400433778762817, "learning_rate": 6.547156726768378e-06, "loss": 0.0237, "step": 4989 }, { "epoch": 3.4568756494631105, "grad_norm": 0.6727641224861145, "learning_rate": 6.546463245492373e-06, "loss": 0.0396, "step": 4990 }, { "epoch": 3.4575684101143054, "grad_norm": 0.4800737500190735, "learning_rate": 6.545769764216366e-06, "loss": 0.0213, "step": 4991 }, { "epoch": 3.4582611707655007, "grad_norm": 0.6022713780403137, "learning_rate": 6.545076282940361e-06, "loss": 0.0287, "step": 4992 }, { "epoch": 3.4589539314166955, "grad_norm": 0.607678234577179, "learning_rate": 6.544382801664355e-06, "loss": 0.0314, "step": 4993 }, { "epoch": 3.4596466920678903, "grad_norm": 0.6265416741371155, "learning_rate": 6.54368932038835e-06, "loss": 0.0311, "step": 4994 }, { "epoch": 3.4603394527190856, "grad_norm": 0.7412983179092407, "learning_rate": 6.542995839112345e-06, "loss": 0.0375, "step": 4995 }, { "epoch": 3.4610322133702804, "grad_norm": 0.6574375033378601, "learning_rate": 6.542302357836338e-06, "loss": 0.0373, "step": 4996 }, { "epoch": 3.4617249740214757, "grad_norm": 0.550150990486145, "learning_rate": 6.541608876560333e-06, "loss": 0.0311, "step": 4997 }, { "epoch": 3.4624177346726706, "grad_norm": 0.6278256773948669, "learning_rate": 6.540915395284327e-06, "loss": 0.0272, "step": 4998 }, { "epoch": 3.463110495323866, "grad_norm": 0.621241569519043, "learning_rate": 6.540221914008322e-06, "loss": 0.0335, "step": 4999 }, { "epoch": 3.4638032559750607, "grad_norm": 0.6454599499702454, "learning_rate": 6.539528432732317e-06, "loss": 0.029, "step": 5000 }, { "epoch": 3.4644960166262555, "grad_norm": 0.5408979058265686, "learning_rate": 6.538834951456311e-06, "loss": 0.0277, "step": 5001 }, { "epoch": 3.465188777277451, "grad_norm": 0.7437618374824524, "learning_rate": 6.538141470180306e-06, "loss": 0.031, "step": 5002 }, { "epoch": 3.4658815379286456, "grad_norm": 0.5505980849266052, "learning_rate": 6.5374479889042995e-06, "loss": 0.03, "step": 5003 }, { "epoch": 3.4665742985798405, "grad_norm": 0.5235286355018616, "learning_rate": 6.536754507628294e-06, "loss": 0.0236, "step": 5004 }, { "epoch": 3.4672670592310357, "grad_norm": 0.5948695540428162, "learning_rate": 6.536061026352289e-06, "loss": 0.0313, "step": 5005 }, { "epoch": 3.4679598198822306, "grad_norm": 0.5590401291847229, "learning_rate": 6.535367545076283e-06, "loss": 0.029, "step": 5006 }, { "epoch": 3.468652580533426, "grad_norm": 0.5125772356987, "learning_rate": 6.534674063800278e-06, "loss": 0.0256, "step": 5007 }, { "epoch": 3.4693453411846207, "grad_norm": 0.6227302551269531, "learning_rate": 6.533980582524272e-06, "loss": 0.0316, "step": 5008 }, { "epoch": 3.470038101835816, "grad_norm": 0.5919042229652405, "learning_rate": 6.5332871012482665e-06, "loss": 0.0293, "step": 5009 }, { "epoch": 3.470730862487011, "grad_norm": 0.5775042176246643, "learning_rate": 6.5325936199722614e-06, "loss": 0.0313, "step": 5010 }, { "epoch": 3.4714236231382056, "grad_norm": 0.5381078124046326, "learning_rate": 6.5319001386962555e-06, "loss": 0.0297, "step": 5011 }, { "epoch": 3.472116383789401, "grad_norm": 0.5170828700065613, "learning_rate": 6.5312066574202505e-06, "loss": 0.0318, "step": 5012 }, { "epoch": 3.4728091444405957, "grad_norm": 0.6526956558227539, "learning_rate": 6.5305131761442446e-06, "loss": 0.0333, "step": 5013 }, { "epoch": 3.4735019050917906, "grad_norm": 0.6767124533653259, "learning_rate": 6.5298196948682395e-06, "loss": 0.0355, "step": 5014 }, { "epoch": 3.474194665742986, "grad_norm": 0.5965400338172913, "learning_rate": 6.5291262135922336e-06, "loss": 0.0322, "step": 5015 }, { "epoch": 3.4748874263941807, "grad_norm": 0.5544615387916565, "learning_rate": 6.528432732316228e-06, "loss": 0.0251, "step": 5016 }, { "epoch": 3.475580187045376, "grad_norm": 0.636720597743988, "learning_rate": 6.527739251040223e-06, "loss": 0.0326, "step": 5017 }, { "epoch": 3.476272947696571, "grad_norm": 0.4987667500972748, "learning_rate": 6.527045769764217e-06, "loss": 0.0281, "step": 5018 }, { "epoch": 3.476965708347766, "grad_norm": 0.5134775042533875, "learning_rate": 6.526352288488212e-06, "loss": 0.0272, "step": 5019 }, { "epoch": 3.477658468998961, "grad_norm": 0.6622617840766907, "learning_rate": 6.5256588072122065e-06, "loss": 0.0332, "step": 5020 }, { "epoch": 3.4783512296501558, "grad_norm": 0.6579943895339966, "learning_rate": 6.5249653259362e-06, "loss": 0.0304, "step": 5021 }, { "epoch": 3.479043990301351, "grad_norm": 0.6182815432548523, "learning_rate": 6.524271844660195e-06, "loss": 0.0328, "step": 5022 }, { "epoch": 3.479736750952546, "grad_norm": 0.5382652878761292, "learning_rate": 6.523578363384189e-06, "loss": 0.0253, "step": 5023 }, { "epoch": 3.4804295116037407, "grad_norm": 0.7151105999946594, "learning_rate": 6.522884882108184e-06, "loss": 0.0303, "step": 5024 }, { "epoch": 3.481122272254936, "grad_norm": 0.5923858284950256, "learning_rate": 6.522191400832179e-06, "loss": 0.0251, "step": 5025 }, { "epoch": 3.481815032906131, "grad_norm": 0.5400237441062927, "learning_rate": 6.521497919556172e-06, "loss": 0.0305, "step": 5026 }, { "epoch": 3.482507793557326, "grad_norm": 0.5522944331169128, "learning_rate": 6.520804438280167e-06, "loss": 0.0262, "step": 5027 }, { "epoch": 3.483200554208521, "grad_norm": 0.7159655094146729, "learning_rate": 6.520110957004161e-06, "loss": 0.0268, "step": 5028 }, { "epoch": 3.483893314859716, "grad_norm": 0.7188911437988281, "learning_rate": 6.519417475728156e-06, "loss": 0.0395, "step": 5029 }, { "epoch": 3.484586075510911, "grad_norm": 0.5369138717651367, "learning_rate": 6.518723994452151e-06, "loss": 0.0319, "step": 5030 }, { "epoch": 3.485278836162106, "grad_norm": 0.6051187515258789, "learning_rate": 6.518030513176145e-06, "loss": 0.038, "step": 5031 }, { "epoch": 3.485971596813301, "grad_norm": 0.4840165376663208, "learning_rate": 6.51733703190014e-06, "loss": 0.022, "step": 5032 }, { "epoch": 3.486664357464496, "grad_norm": 0.5075309872627258, "learning_rate": 6.516643550624133e-06, "loss": 0.026, "step": 5033 }, { "epoch": 3.487357118115691, "grad_norm": 0.5605731010437012, "learning_rate": 6.515950069348128e-06, "loss": 0.0254, "step": 5034 }, { "epoch": 3.488049878766886, "grad_norm": 0.5532118082046509, "learning_rate": 6.515256588072123e-06, "loss": 0.0245, "step": 5035 }, { "epoch": 3.488742639418081, "grad_norm": 0.5283900499343872, "learning_rate": 6.514563106796117e-06, "loss": 0.0306, "step": 5036 }, { "epoch": 3.489435400069276, "grad_norm": 0.5738806128501892, "learning_rate": 6.513869625520112e-06, "loss": 0.028, "step": 5037 }, { "epoch": 3.490128160720471, "grad_norm": 0.45622918009757996, "learning_rate": 6.513176144244105e-06, "loss": 0.0211, "step": 5038 }, { "epoch": 3.4908209213716663, "grad_norm": 0.5849683284759521, "learning_rate": 6.5124826629681e-06, "loss": 0.0345, "step": 5039 }, { "epoch": 3.491513682022861, "grad_norm": 0.6652094721794128, "learning_rate": 6.511789181692095e-06, "loss": 0.0322, "step": 5040 }, { "epoch": 3.492206442674056, "grad_norm": 0.5022942423820496, "learning_rate": 6.511095700416089e-06, "loss": 0.0213, "step": 5041 }, { "epoch": 3.4928992033252513, "grad_norm": 0.5821471810340881, "learning_rate": 6.510402219140084e-06, "loss": 0.0325, "step": 5042 }, { "epoch": 3.493591963976446, "grad_norm": 0.5777978897094727, "learning_rate": 6.509708737864078e-06, "loss": 0.0303, "step": 5043 }, { "epoch": 3.494284724627641, "grad_norm": 0.5560289621353149, "learning_rate": 6.509015256588072e-06, "loss": 0.0324, "step": 5044 }, { "epoch": 3.4949774852788362, "grad_norm": 0.5134693384170532, "learning_rate": 6.508321775312067e-06, "loss": 0.0213, "step": 5045 }, { "epoch": 3.495670245930031, "grad_norm": 0.6379109025001526, "learning_rate": 6.507628294036061e-06, "loss": 0.0365, "step": 5046 }, { "epoch": 3.4963630065812263, "grad_norm": 0.5841686725616455, "learning_rate": 6.506934812760056e-06, "loss": 0.0234, "step": 5047 }, { "epoch": 3.497055767232421, "grad_norm": 0.5673247575759888, "learning_rate": 6.50624133148405e-06, "loss": 0.0202, "step": 5048 }, { "epoch": 3.4977485278836165, "grad_norm": 0.5946432948112488, "learning_rate": 6.505547850208045e-06, "loss": 0.0318, "step": 5049 }, { "epoch": 3.4984412885348113, "grad_norm": 0.5511200428009033, "learning_rate": 6.50485436893204e-06, "loss": 0.0251, "step": 5050 }, { "epoch": 3.499134049186006, "grad_norm": 0.5466020703315735, "learning_rate": 6.504160887656033e-06, "loss": 0.0239, "step": 5051 }, { "epoch": 3.4998268098372014, "grad_norm": 0.6413298845291138, "learning_rate": 6.503467406380028e-06, "loss": 0.0375, "step": 5052 }, { "epoch": 3.5005195704883962, "grad_norm": 0.6071054935455322, "learning_rate": 6.502773925104022e-06, "loss": 0.029, "step": 5053 }, { "epoch": 3.501212331139591, "grad_norm": 0.7107934355735779, "learning_rate": 6.502080443828017e-06, "loss": 0.0305, "step": 5054 }, { "epoch": 3.5019050917907864, "grad_norm": 0.6063798666000366, "learning_rate": 6.501386962552012e-06, "loss": 0.0284, "step": 5055 }, { "epoch": 3.502597852441981, "grad_norm": 0.6412544250488281, "learning_rate": 6.5006934812760055e-06, "loss": 0.0352, "step": 5056 }, { "epoch": 3.5032906130931765, "grad_norm": 0.6058118343353271, "learning_rate": 6.5000000000000004e-06, "loss": 0.0317, "step": 5057 }, { "epoch": 3.5039833737443713, "grad_norm": 0.6159127950668335, "learning_rate": 6.4993065187239945e-06, "loss": 0.0363, "step": 5058 }, { "epoch": 3.5046761343955666, "grad_norm": 0.5549063682556152, "learning_rate": 6.4986130374479895e-06, "loss": 0.031, "step": 5059 }, { "epoch": 3.5053688950467614, "grad_norm": 0.7032778859138489, "learning_rate": 6.497919556171984e-06, "loss": 0.0319, "step": 5060 }, { "epoch": 3.5060616556979562, "grad_norm": 0.6714448928833008, "learning_rate": 6.4972260748959785e-06, "loss": 0.0345, "step": 5061 }, { "epoch": 3.5067544163491515, "grad_norm": 0.46812206506729126, "learning_rate": 6.496532593619973e-06, "loss": 0.0268, "step": 5062 }, { "epoch": 3.5074471770003464, "grad_norm": 0.5488625764846802, "learning_rate": 6.495839112343967e-06, "loss": 0.0276, "step": 5063 }, { "epoch": 3.508139937651541, "grad_norm": 0.5239285826683044, "learning_rate": 6.495145631067962e-06, "loss": 0.0251, "step": 5064 }, { "epoch": 3.5088326983027365, "grad_norm": 0.5092358589172363, "learning_rate": 6.4944521497919565e-06, "loss": 0.0253, "step": 5065 }, { "epoch": 3.5095254589539313, "grad_norm": 0.7008700370788574, "learning_rate": 6.493758668515951e-06, "loss": 0.0242, "step": 5066 }, { "epoch": 3.5102182196051266, "grad_norm": 0.5645884871482849, "learning_rate": 6.4930651872399455e-06, "loss": 0.0311, "step": 5067 }, { "epoch": 3.5109109802563214, "grad_norm": 0.7841275930404663, "learning_rate": 6.492371705963939e-06, "loss": 0.0437, "step": 5068 }, { "epoch": 3.5116037409075167, "grad_norm": 0.5667563676834106, "learning_rate": 6.491678224687934e-06, "loss": 0.026, "step": 5069 }, { "epoch": 3.5122965015587115, "grad_norm": 0.6524698734283447, "learning_rate": 6.490984743411929e-06, "loss": 0.0303, "step": 5070 }, { "epoch": 3.5129892622099064, "grad_norm": 0.6123387217521667, "learning_rate": 6.490291262135923e-06, "loss": 0.036, "step": 5071 }, { "epoch": 3.5136820228611017, "grad_norm": 0.6954811811447144, "learning_rate": 6.489597780859918e-06, "loss": 0.0358, "step": 5072 }, { "epoch": 3.5143747835122965, "grad_norm": 0.5746607780456543, "learning_rate": 6.488904299583911e-06, "loss": 0.0387, "step": 5073 }, { "epoch": 3.5150675441634913, "grad_norm": 0.6515728831291199, "learning_rate": 6.488210818307906e-06, "loss": 0.0353, "step": 5074 }, { "epoch": 3.5157603048146866, "grad_norm": 0.6279569864273071, "learning_rate": 6.487517337031901e-06, "loss": 0.0329, "step": 5075 }, { "epoch": 3.5164530654658814, "grad_norm": 0.5640904307365417, "learning_rate": 6.486823855755895e-06, "loss": 0.0209, "step": 5076 }, { "epoch": 3.5171458261170767, "grad_norm": 0.5993984341621399, "learning_rate": 6.48613037447989e-06, "loss": 0.0286, "step": 5077 }, { "epoch": 3.5178385867682715, "grad_norm": 0.6607270240783691, "learning_rate": 6.485436893203884e-06, "loss": 0.0301, "step": 5078 }, { "epoch": 3.518531347419467, "grad_norm": 0.5850106477737427, "learning_rate": 6.484743411927879e-06, "loss": 0.0322, "step": 5079 }, { "epoch": 3.5192241080706617, "grad_norm": 0.6616483330726624, "learning_rate": 6.484049930651874e-06, "loss": 0.0288, "step": 5080 }, { "epoch": 3.5199168687218565, "grad_norm": 0.6212733387947083, "learning_rate": 6.483356449375867e-06, "loss": 0.029, "step": 5081 }, { "epoch": 3.5206096293730518, "grad_norm": 0.6463752388954163, "learning_rate": 6.482662968099862e-06, "loss": 0.0245, "step": 5082 }, { "epoch": 3.5213023900242466, "grad_norm": 0.6329712271690369, "learning_rate": 6.481969486823856e-06, "loss": 0.0407, "step": 5083 }, { "epoch": 3.5219951506754414, "grad_norm": 0.5336253643035889, "learning_rate": 6.481276005547851e-06, "loss": 0.0256, "step": 5084 }, { "epoch": 3.5226879113266367, "grad_norm": 0.6275019645690918, "learning_rate": 6.480582524271846e-06, "loss": 0.0322, "step": 5085 }, { "epoch": 3.5233806719778316, "grad_norm": 0.6023123860359192, "learning_rate": 6.479889042995839e-06, "loss": 0.0383, "step": 5086 }, { "epoch": 3.524073432629027, "grad_norm": 1.0357578992843628, "learning_rate": 6.479195561719834e-06, "loss": 0.0265, "step": 5087 }, { "epoch": 3.5247661932802217, "grad_norm": 0.7056729197502136, "learning_rate": 6.478502080443828e-06, "loss": 0.0429, "step": 5088 }, { "epoch": 3.525458953931417, "grad_norm": 0.56638503074646, "learning_rate": 6.477808599167823e-06, "loss": 0.0285, "step": 5089 }, { "epoch": 3.526151714582612, "grad_norm": 0.5246169567108154, "learning_rate": 6.477115117891818e-06, "loss": 0.028, "step": 5090 }, { "epoch": 3.5268444752338066, "grad_norm": 0.6372687220573425, "learning_rate": 6.476421636615812e-06, "loss": 0.0353, "step": 5091 }, { "epoch": 3.527537235885002, "grad_norm": 0.5155999064445496, "learning_rate": 6.475728155339806e-06, "loss": 0.0248, "step": 5092 }, { "epoch": 3.5282299965361967, "grad_norm": 0.544079601764679, "learning_rate": 6.4750346740638e-06, "loss": 0.0296, "step": 5093 }, { "epoch": 3.5289227571873916, "grad_norm": 0.5598334074020386, "learning_rate": 6.474341192787795e-06, "loss": 0.0277, "step": 5094 }, { "epoch": 3.529615517838587, "grad_norm": 0.7250769138336182, "learning_rate": 6.47364771151179e-06, "loss": 0.0437, "step": 5095 }, { "epoch": 3.5303082784897817, "grad_norm": 0.6074924468994141, "learning_rate": 6.472954230235784e-06, "loss": 0.0318, "step": 5096 }, { "epoch": 3.5310010391409765, "grad_norm": 0.5366249084472656, "learning_rate": 6.472260748959779e-06, "loss": 0.0277, "step": 5097 }, { "epoch": 3.531693799792172, "grad_norm": 0.4772739112377167, "learning_rate": 6.471567267683772e-06, "loss": 0.0208, "step": 5098 }, { "epoch": 3.532386560443367, "grad_norm": 0.6919244527816772, "learning_rate": 6.470873786407767e-06, "loss": 0.0311, "step": 5099 }, { "epoch": 3.533079321094562, "grad_norm": 0.62872713804245, "learning_rate": 6.470180305131762e-06, "loss": 0.0392, "step": 5100 }, { "epoch": 3.5337720817457567, "grad_norm": 0.5211807489395142, "learning_rate": 6.469486823855756e-06, "loss": 0.0223, "step": 5101 }, { "epoch": 3.534464842396952, "grad_norm": 0.647637665271759, "learning_rate": 6.468793342579751e-06, "loss": 0.0378, "step": 5102 }, { "epoch": 3.535157603048147, "grad_norm": 0.6397925019264221, "learning_rate": 6.4680998613037445e-06, "loss": 0.0343, "step": 5103 }, { "epoch": 3.5358503636993417, "grad_norm": 0.487280935049057, "learning_rate": 6.4674063800277394e-06, "loss": 0.026, "step": 5104 }, { "epoch": 3.536543124350537, "grad_norm": 0.5970995426177979, "learning_rate": 6.466712898751734e-06, "loss": 0.0341, "step": 5105 }, { "epoch": 3.537235885001732, "grad_norm": 0.6427799463272095, "learning_rate": 6.4660194174757285e-06, "loss": 0.0416, "step": 5106 }, { "epoch": 3.5379286456529266, "grad_norm": 0.8776400685310364, "learning_rate": 6.465325936199723e-06, "loss": 0.0409, "step": 5107 }, { "epoch": 3.538621406304122, "grad_norm": 0.5709391236305237, "learning_rate": 6.4646324549237175e-06, "loss": 0.0311, "step": 5108 }, { "epoch": 3.539314166955317, "grad_norm": 0.5883795022964478, "learning_rate": 6.463938973647712e-06, "loss": 0.0415, "step": 5109 }, { "epoch": 3.540006927606512, "grad_norm": 0.6908285021781921, "learning_rate": 6.463245492371707e-06, "loss": 0.0381, "step": 5110 }, { "epoch": 3.540699688257707, "grad_norm": 0.5694625973701477, "learning_rate": 6.462552011095701e-06, "loss": 0.0315, "step": 5111 }, { "epoch": 3.541392448908902, "grad_norm": 0.7304494976997375, "learning_rate": 6.4618585298196955e-06, "loss": 0.0406, "step": 5112 }, { "epoch": 3.542085209560097, "grad_norm": 0.5786203145980835, "learning_rate": 6.46116504854369e-06, "loss": 0.0264, "step": 5113 }, { "epoch": 3.542777970211292, "grad_norm": 0.5530243515968323, "learning_rate": 6.4604715672676845e-06, "loss": 0.0256, "step": 5114 }, { "epoch": 3.543470730862487, "grad_norm": 0.6862016916275024, "learning_rate": 6.4597780859916795e-06, "loss": 0.0346, "step": 5115 }, { "epoch": 3.544163491513682, "grad_norm": 0.6406251192092896, "learning_rate": 6.459084604715673e-06, "loss": 0.0357, "step": 5116 }, { "epoch": 3.5448562521648768, "grad_norm": 0.6029278039932251, "learning_rate": 6.458391123439668e-06, "loss": 0.0303, "step": 5117 }, { "epoch": 3.545549012816072, "grad_norm": 0.49682673811912537, "learning_rate": 6.457697642163662e-06, "loss": 0.0281, "step": 5118 }, { "epoch": 3.5462417734672673, "grad_norm": 0.8219375610351562, "learning_rate": 6.457004160887657e-06, "loss": 0.0295, "step": 5119 }, { "epoch": 3.546934534118462, "grad_norm": 0.6140983700752258, "learning_rate": 6.456310679611652e-06, "loss": 0.0266, "step": 5120 }, { "epoch": 3.547627294769657, "grad_norm": 0.6019349098205566, "learning_rate": 6.455617198335645e-06, "loss": 0.0299, "step": 5121 }, { "epoch": 3.5483200554208523, "grad_norm": 0.5619654059410095, "learning_rate": 6.45492371705964e-06, "loss": 0.0308, "step": 5122 }, { "epoch": 3.549012816072047, "grad_norm": 0.4877817928791046, "learning_rate": 6.454230235783634e-06, "loss": 0.0246, "step": 5123 }, { "epoch": 3.549705576723242, "grad_norm": 0.9568673372268677, "learning_rate": 6.453536754507629e-06, "loss": 0.0333, "step": 5124 }, { "epoch": 3.550398337374437, "grad_norm": 0.6768813133239746, "learning_rate": 6.452843273231624e-06, "loss": 0.0353, "step": 5125 }, { "epoch": 3.551091098025632, "grad_norm": 0.5718116760253906, "learning_rate": 6.452149791955618e-06, "loss": 0.0321, "step": 5126 }, { "epoch": 3.551783858676827, "grad_norm": 0.5333515405654907, "learning_rate": 6.451456310679613e-06, "loss": 0.0286, "step": 5127 }, { "epoch": 3.552476619328022, "grad_norm": 0.6497219204902649, "learning_rate": 6.450762829403606e-06, "loss": 0.0338, "step": 5128 }, { "epoch": 3.5531693799792174, "grad_norm": 0.6667235493659973, "learning_rate": 6.450069348127601e-06, "loss": 0.0318, "step": 5129 }, { "epoch": 3.5538621406304123, "grad_norm": 0.6585575342178345, "learning_rate": 6.449375866851596e-06, "loss": 0.0321, "step": 5130 }, { "epoch": 3.554554901281607, "grad_norm": 0.6151941418647766, "learning_rate": 6.44868238557559e-06, "loss": 0.0326, "step": 5131 }, { "epoch": 3.5552476619328024, "grad_norm": 0.5822476744651794, "learning_rate": 6.447988904299585e-06, "loss": 0.0228, "step": 5132 }, { "epoch": 3.5559404225839972, "grad_norm": 0.6461667418479919, "learning_rate": 6.447295423023578e-06, "loss": 0.035, "step": 5133 }, { "epoch": 3.556633183235192, "grad_norm": 0.6769730448722839, "learning_rate": 6.446601941747573e-06, "loss": 0.0365, "step": 5134 }, { "epoch": 3.5573259438863873, "grad_norm": 0.6281021237373352, "learning_rate": 6.445908460471568e-06, "loss": 0.0318, "step": 5135 }, { "epoch": 3.558018704537582, "grad_norm": 0.5587422847747803, "learning_rate": 6.445214979195562e-06, "loss": 0.0346, "step": 5136 }, { "epoch": 3.558711465188777, "grad_norm": 0.537922203540802, "learning_rate": 6.444521497919557e-06, "loss": 0.0313, "step": 5137 }, { "epoch": 3.5594042258399723, "grad_norm": 0.7479827404022217, "learning_rate": 6.443828016643551e-06, "loss": 0.037, "step": 5138 }, { "epoch": 3.5600969864911676, "grad_norm": 0.6676491498947144, "learning_rate": 6.443134535367546e-06, "loss": 0.0299, "step": 5139 }, { "epoch": 3.5607897471423624, "grad_norm": 0.5506122708320618, "learning_rate": 6.44244105409154e-06, "loss": 0.0364, "step": 5140 }, { "epoch": 3.5614825077935572, "grad_norm": 0.5065435767173767, "learning_rate": 6.441747572815534e-06, "loss": 0.027, "step": 5141 }, { "epoch": 3.5621752684447525, "grad_norm": 0.6939265131950378, "learning_rate": 6.441054091539529e-06, "loss": 0.0326, "step": 5142 }, { "epoch": 3.5628680290959474, "grad_norm": 0.7979405522346497, "learning_rate": 6.440360610263523e-06, "loss": 0.0424, "step": 5143 }, { "epoch": 3.563560789747142, "grad_norm": 0.6120886206626892, "learning_rate": 6.439667128987518e-06, "loss": 0.0376, "step": 5144 }, { "epoch": 3.5642535503983375, "grad_norm": 0.6197006106376648, "learning_rate": 6.438973647711513e-06, "loss": 0.0264, "step": 5145 }, { "epoch": 3.5649463110495323, "grad_norm": 0.6210958957672119, "learning_rate": 6.438280166435506e-06, "loss": 0.0305, "step": 5146 }, { "epoch": 3.565639071700727, "grad_norm": 0.6047160029411316, "learning_rate": 6.437586685159501e-06, "loss": 0.0295, "step": 5147 }, { "epoch": 3.5663318323519224, "grad_norm": 0.5486659407615662, "learning_rate": 6.436893203883495e-06, "loss": 0.0276, "step": 5148 }, { "epoch": 3.5670245930031177, "grad_norm": 0.6126028895378113, "learning_rate": 6.43619972260749e-06, "loss": 0.0346, "step": 5149 }, { "epoch": 3.5677173536543125, "grad_norm": 0.6695546507835388, "learning_rate": 6.435506241331485e-06, "loss": 0.0379, "step": 5150 }, { "epoch": 3.5684101143055074, "grad_norm": 0.5959687232971191, "learning_rate": 6.4348127600554784e-06, "loss": 0.0286, "step": 5151 }, { "epoch": 3.5691028749567026, "grad_norm": 0.6917016506195068, "learning_rate": 6.434119278779473e-06, "loss": 0.0338, "step": 5152 }, { "epoch": 3.5697956356078975, "grad_norm": 0.604941725730896, "learning_rate": 6.4334257975034675e-06, "loss": 0.0283, "step": 5153 }, { "epoch": 3.5704883962590923, "grad_norm": 0.4962579011917114, "learning_rate": 6.432732316227462e-06, "loss": 0.0212, "step": 5154 }, { "epoch": 3.5711811569102876, "grad_norm": 0.6287131309509277, "learning_rate": 6.432038834951457e-06, "loss": 0.0331, "step": 5155 }, { "epoch": 3.5718739175614824, "grad_norm": 0.5796381831169128, "learning_rate": 6.431345353675451e-06, "loss": 0.0317, "step": 5156 }, { "epoch": 3.5725666782126773, "grad_norm": 0.7629371881484985, "learning_rate": 6.430651872399446e-06, "loss": 0.0376, "step": 5157 }, { "epoch": 3.5732594388638725, "grad_norm": 0.6308226585388184, "learning_rate": 6.42995839112344e-06, "loss": 0.0373, "step": 5158 }, { "epoch": 3.573952199515068, "grad_norm": 0.5209526419639587, "learning_rate": 6.4292649098474345e-06, "loss": 0.0302, "step": 5159 }, { "epoch": 3.5746449601662627, "grad_norm": 0.6444221138954163, "learning_rate": 6.4285714285714295e-06, "loss": 0.0292, "step": 5160 }, { "epoch": 3.5753377208174575, "grad_norm": 0.4564352035522461, "learning_rate": 6.4278779472954235e-06, "loss": 0.0208, "step": 5161 }, { "epoch": 3.5760304814686528, "grad_norm": 0.5314273834228516, "learning_rate": 6.4271844660194185e-06, "loss": 0.0292, "step": 5162 }, { "epoch": 3.5767232421198476, "grad_norm": 0.5213531851768494, "learning_rate": 6.426490984743412e-06, "loss": 0.0261, "step": 5163 }, { "epoch": 3.5774160027710424, "grad_norm": 0.5768083333969116, "learning_rate": 6.425797503467407e-06, "loss": 0.028, "step": 5164 }, { "epoch": 3.5781087634222377, "grad_norm": 0.5932965278625488, "learning_rate": 6.425104022191402e-06, "loss": 0.026, "step": 5165 }, { "epoch": 3.5788015240734325, "grad_norm": 0.7689990401268005, "learning_rate": 6.424410540915396e-06, "loss": 0.0205, "step": 5166 }, { "epoch": 3.5794942847246274, "grad_norm": 0.5870887637138367, "learning_rate": 6.423717059639391e-06, "loss": 0.0384, "step": 5167 }, { "epoch": 3.5801870453758227, "grad_norm": 0.5823937058448792, "learning_rate": 6.423023578363385e-06, "loss": 0.0352, "step": 5168 }, { "epoch": 3.580879806027018, "grad_norm": 0.7018173933029175, "learning_rate": 6.422330097087379e-06, "loss": 0.0423, "step": 5169 }, { "epoch": 3.5815725666782128, "grad_norm": 0.545872151851654, "learning_rate": 6.421636615811374e-06, "loss": 0.0307, "step": 5170 }, { "epoch": 3.5822653273294076, "grad_norm": 0.5199849009513855, "learning_rate": 6.420943134535368e-06, "loss": 0.0292, "step": 5171 }, { "epoch": 3.582958087980603, "grad_norm": 0.41827356815338135, "learning_rate": 6.420249653259363e-06, "loss": 0.02, "step": 5172 }, { "epoch": 3.5836508486317977, "grad_norm": 0.6563012599945068, "learning_rate": 6.419556171983357e-06, "loss": 0.0389, "step": 5173 }, { "epoch": 3.5843436092829926, "grad_norm": 0.6314850449562073, "learning_rate": 6.418862690707352e-06, "loss": 0.0326, "step": 5174 }, { "epoch": 3.585036369934188, "grad_norm": 0.475079745054245, "learning_rate": 6.418169209431347e-06, "loss": 0.0208, "step": 5175 }, { "epoch": 3.5857291305853827, "grad_norm": 0.6002709269523621, "learning_rate": 6.41747572815534e-06, "loss": 0.0298, "step": 5176 }, { "epoch": 3.5864218912365775, "grad_norm": 0.5082271099090576, "learning_rate": 6.416782246879335e-06, "loss": 0.0263, "step": 5177 }, { "epoch": 3.587114651887773, "grad_norm": 0.584649384021759, "learning_rate": 6.416088765603329e-06, "loss": 0.0319, "step": 5178 }, { "epoch": 3.5878074125389676, "grad_norm": 0.6434041857719421, "learning_rate": 6.415395284327324e-06, "loss": 0.0393, "step": 5179 }, { "epoch": 3.588500173190163, "grad_norm": 0.6173291802406311, "learning_rate": 6.414701803051319e-06, "loss": 0.0365, "step": 5180 }, { "epoch": 3.5891929338413577, "grad_norm": 0.6330900192260742, "learning_rate": 6.414008321775312e-06, "loss": 0.0324, "step": 5181 }, { "epoch": 3.589885694492553, "grad_norm": 0.5519096255302429, "learning_rate": 6.413314840499307e-06, "loss": 0.0266, "step": 5182 }, { "epoch": 3.590578455143748, "grad_norm": 0.5743852853775024, "learning_rate": 6.412621359223301e-06, "loss": 0.0264, "step": 5183 }, { "epoch": 3.5912712157949427, "grad_norm": 0.6207646727561951, "learning_rate": 6.411927877947296e-06, "loss": 0.0263, "step": 5184 }, { "epoch": 3.591963976446138, "grad_norm": 0.5601102709770203, "learning_rate": 6.411234396671291e-06, "loss": 0.026, "step": 5185 }, { "epoch": 3.592656737097333, "grad_norm": 0.596768319606781, "learning_rate": 6.410540915395285e-06, "loss": 0.0319, "step": 5186 }, { "epoch": 3.5933494977485276, "grad_norm": 0.6190519332885742, "learning_rate": 6.40984743411928e-06, "loss": 0.0253, "step": 5187 }, { "epoch": 3.594042258399723, "grad_norm": 0.5937433838844299, "learning_rate": 6.409153952843273e-06, "loss": 0.0319, "step": 5188 }, { "epoch": 3.5947350190509177, "grad_norm": 0.5507180094718933, "learning_rate": 6.408460471567268e-06, "loss": 0.025, "step": 5189 }, { "epoch": 3.595427779702113, "grad_norm": 0.6053184866905212, "learning_rate": 6.407766990291263e-06, "loss": 0.0396, "step": 5190 }, { "epoch": 3.596120540353308, "grad_norm": 0.6637827157974243, "learning_rate": 6.407073509015257e-06, "loss": 0.0302, "step": 5191 }, { "epoch": 3.596813301004503, "grad_norm": 0.5112492442131042, "learning_rate": 6.406380027739252e-06, "loss": 0.0257, "step": 5192 }, { "epoch": 3.597506061655698, "grad_norm": 0.5539286136627197, "learning_rate": 6.405686546463245e-06, "loss": 0.0286, "step": 5193 }, { "epoch": 3.598198822306893, "grad_norm": 0.5003301501274109, "learning_rate": 6.40499306518724e-06, "loss": 0.0284, "step": 5194 }, { "epoch": 3.598891582958088, "grad_norm": 0.5797738432884216, "learning_rate": 6.404299583911235e-06, "loss": 0.0317, "step": 5195 }, { "epoch": 3.599584343609283, "grad_norm": 0.6324701905250549, "learning_rate": 6.403606102635229e-06, "loss": 0.0317, "step": 5196 }, { "epoch": 3.6002771042604778, "grad_norm": 0.7718259692192078, "learning_rate": 6.402912621359224e-06, "loss": 0.0436, "step": 5197 }, { "epoch": 3.600969864911673, "grad_norm": 0.5175343751907349, "learning_rate": 6.402219140083218e-06, "loss": 0.03, "step": 5198 }, { "epoch": 3.601662625562868, "grad_norm": 0.688150942325592, "learning_rate": 6.401525658807212e-06, "loss": 0.0352, "step": 5199 }, { "epoch": 3.602355386214063, "grad_norm": 0.4672299027442932, "learning_rate": 6.400832177531207e-06, "loss": 0.023, "step": 5200 }, { "epoch": 3.603048146865258, "grad_norm": 0.49436065554618835, "learning_rate": 6.400138696255201e-06, "loss": 0.022, "step": 5201 }, { "epoch": 3.6037409075164533, "grad_norm": 0.6367473006248474, "learning_rate": 6.399445214979196e-06, "loss": 0.03, "step": 5202 }, { "epoch": 3.604433668167648, "grad_norm": 0.5535686612129211, "learning_rate": 6.39875173370319e-06, "loss": 0.0346, "step": 5203 }, { "epoch": 3.605126428818843, "grad_norm": 0.5234419107437134, "learning_rate": 6.398058252427185e-06, "loss": 0.0279, "step": 5204 }, { "epoch": 3.605819189470038, "grad_norm": 0.5995169281959534, "learning_rate": 6.39736477115118e-06, "loss": 0.028, "step": 5205 }, { "epoch": 3.606511950121233, "grad_norm": 0.5009410381317139, "learning_rate": 6.3966712898751735e-06, "loss": 0.0253, "step": 5206 }, { "epoch": 3.607204710772428, "grad_norm": 0.6058676838874817, "learning_rate": 6.3959778085991685e-06, "loss": 0.0297, "step": 5207 }, { "epoch": 3.607897471423623, "grad_norm": 0.5171012282371521, "learning_rate": 6.3952843273231625e-06, "loss": 0.0264, "step": 5208 }, { "epoch": 3.608590232074818, "grad_norm": 0.7421143054962158, "learning_rate": 6.3945908460471575e-06, "loss": 0.0274, "step": 5209 }, { "epoch": 3.6092829927260133, "grad_norm": 0.5940254926681519, "learning_rate": 6.393897364771152e-06, "loss": 0.0492, "step": 5210 }, { "epoch": 3.609975753377208, "grad_norm": 0.5492020845413208, "learning_rate": 6.393203883495146e-06, "loss": 0.0324, "step": 5211 }, { "epoch": 3.6106685140284034, "grad_norm": 0.5443360209465027, "learning_rate": 6.392510402219141e-06, "loss": 0.0399, "step": 5212 }, { "epoch": 3.611361274679598, "grad_norm": 0.752368688583374, "learning_rate": 6.391816920943135e-06, "loss": 0.0441, "step": 5213 }, { "epoch": 3.612054035330793, "grad_norm": 0.6221478581428528, "learning_rate": 6.39112343966713e-06, "loss": 0.0338, "step": 5214 }, { "epoch": 3.6127467959819883, "grad_norm": 0.487557590007782, "learning_rate": 6.3904299583911245e-06, "loss": 0.0269, "step": 5215 }, { "epoch": 3.613439556633183, "grad_norm": 0.5465534329414368, "learning_rate": 6.389736477115119e-06, "loss": 0.0324, "step": 5216 }, { "epoch": 3.614132317284378, "grad_norm": 0.5025929808616638, "learning_rate": 6.3890429958391136e-06, "loss": 0.0267, "step": 5217 }, { "epoch": 3.6148250779355733, "grad_norm": 0.5270314812660217, "learning_rate": 6.388349514563107e-06, "loss": 0.0267, "step": 5218 }, { "epoch": 3.615517838586768, "grad_norm": 0.5299199223518372, "learning_rate": 6.387656033287102e-06, "loss": 0.025, "step": 5219 }, { "epoch": 3.6162105992379634, "grad_norm": 0.6264725923538208, "learning_rate": 6.386962552011097e-06, "loss": 0.0242, "step": 5220 }, { "epoch": 3.6169033598891582, "grad_norm": 0.5444220304489136, "learning_rate": 6.386269070735091e-06, "loss": 0.0275, "step": 5221 }, { "epoch": 3.6175961205403535, "grad_norm": 0.654484212398529, "learning_rate": 6.385575589459086e-06, "loss": 0.0392, "step": 5222 }, { "epoch": 3.6182888811915483, "grad_norm": 0.5375171303749084, "learning_rate": 6.384882108183079e-06, "loss": 0.0301, "step": 5223 }, { "epoch": 3.618981641842743, "grad_norm": 0.6169750690460205, "learning_rate": 6.384188626907074e-06, "loss": 0.0255, "step": 5224 }, { "epoch": 3.6196744024939385, "grad_norm": 0.579149067401886, "learning_rate": 6.383495145631069e-06, "loss": 0.0351, "step": 5225 }, { "epoch": 3.6203671631451333, "grad_norm": 0.8136575222015381, "learning_rate": 6.382801664355063e-06, "loss": 0.036, "step": 5226 }, { "epoch": 3.621059923796328, "grad_norm": 0.7540092468261719, "learning_rate": 6.382108183079058e-06, "loss": 0.0391, "step": 5227 }, { "epoch": 3.6217526844475234, "grad_norm": 0.5661916732788086, "learning_rate": 6.381414701803051e-06, "loss": 0.0262, "step": 5228 }, { "epoch": 3.6224454450987182, "grad_norm": 0.6469407081604004, "learning_rate": 6.380721220527046e-06, "loss": 0.0331, "step": 5229 }, { "epoch": 3.6231382057499135, "grad_norm": 0.6229873299598694, "learning_rate": 6.380027739251041e-06, "loss": 0.0283, "step": 5230 }, { "epoch": 3.6238309664011084, "grad_norm": 0.575975239276886, "learning_rate": 6.379334257975035e-06, "loss": 0.0346, "step": 5231 }, { "epoch": 3.6245237270523036, "grad_norm": 0.5156070590019226, "learning_rate": 6.37864077669903e-06, "loss": 0.0218, "step": 5232 }, { "epoch": 3.6252164877034985, "grad_norm": 0.6007578372955322, "learning_rate": 6.377947295423024e-06, "loss": 0.0266, "step": 5233 }, { "epoch": 3.6259092483546933, "grad_norm": 0.6745254397392273, "learning_rate": 6.377253814147019e-06, "loss": 0.0343, "step": 5234 }, { "epoch": 3.6266020090058886, "grad_norm": 0.601026177406311, "learning_rate": 6.376560332871014e-06, "loss": 0.0364, "step": 5235 }, { "epoch": 3.6272947696570834, "grad_norm": 0.5416752099990845, "learning_rate": 6.375866851595007e-06, "loss": 0.0275, "step": 5236 }, { "epoch": 3.6279875303082783, "grad_norm": 0.5874926447868347, "learning_rate": 6.375173370319002e-06, "loss": 0.0304, "step": 5237 }, { "epoch": 3.6286802909594735, "grad_norm": 0.5943888425827026, "learning_rate": 6.374479889042996e-06, "loss": 0.0278, "step": 5238 }, { "epoch": 3.6293730516106684, "grad_norm": 0.5624053478240967, "learning_rate": 6.373786407766991e-06, "loss": 0.028, "step": 5239 }, { "epoch": 3.6300658122618636, "grad_norm": 0.5878169536590576, "learning_rate": 6.373092926490986e-06, "loss": 0.0347, "step": 5240 }, { "epoch": 3.6307585729130585, "grad_norm": 0.5780265927314758, "learning_rate": 6.372399445214979e-06, "loss": 0.0352, "step": 5241 }, { "epoch": 3.6314513335642538, "grad_norm": 0.7032192349433899, "learning_rate": 6.371705963938974e-06, "loss": 0.0295, "step": 5242 }, { "epoch": 3.6321440942154486, "grad_norm": 0.6351820230484009, "learning_rate": 6.371012482662968e-06, "loss": 0.0336, "step": 5243 }, { "epoch": 3.6328368548666434, "grad_norm": 0.5924007296562195, "learning_rate": 6.370319001386963e-06, "loss": 0.0312, "step": 5244 }, { "epoch": 3.6335296155178387, "grad_norm": 0.6216744780540466, "learning_rate": 6.369625520110958e-06, "loss": 0.0327, "step": 5245 }, { "epoch": 3.6342223761690335, "grad_norm": 0.5291363596916199, "learning_rate": 6.368932038834952e-06, "loss": 0.0266, "step": 5246 }, { "epoch": 3.6349151368202284, "grad_norm": 0.628518283367157, "learning_rate": 6.368238557558946e-06, "loss": 0.0397, "step": 5247 }, { "epoch": 3.6356078974714237, "grad_norm": 0.5681959986686707, "learning_rate": 6.36754507628294e-06, "loss": 0.0274, "step": 5248 }, { "epoch": 3.6363006581226185, "grad_norm": 0.5964863896369934, "learning_rate": 6.366851595006935e-06, "loss": 0.0301, "step": 5249 }, { "epoch": 3.6369934187738138, "grad_norm": 0.5186253786087036, "learning_rate": 6.36615811373093e-06, "loss": 0.0273, "step": 5250 }, { "epoch": 3.6376861794250086, "grad_norm": 0.5899990797042847, "learning_rate": 6.365464632454924e-06, "loss": 0.0408, "step": 5251 }, { "epoch": 3.638378940076204, "grad_norm": 0.5952342748641968, "learning_rate": 6.364771151178919e-06, "loss": 0.0309, "step": 5252 }, { "epoch": 3.6390717007273987, "grad_norm": 0.5763419270515442, "learning_rate": 6.3640776699029125e-06, "loss": 0.0288, "step": 5253 }, { "epoch": 3.6397644613785936, "grad_norm": 0.6522453427314758, "learning_rate": 6.3633841886269075e-06, "loss": 0.0329, "step": 5254 }, { "epoch": 3.640457222029789, "grad_norm": 0.5152024626731873, "learning_rate": 6.362690707350902e-06, "loss": 0.0246, "step": 5255 }, { "epoch": 3.6411499826809837, "grad_norm": 0.6586542725563049, "learning_rate": 6.3619972260748965e-06, "loss": 0.034, "step": 5256 }, { "epoch": 3.6418427433321785, "grad_norm": 0.5849273800849915, "learning_rate": 6.361303744798891e-06, "loss": 0.033, "step": 5257 }, { "epoch": 3.642535503983374, "grad_norm": 0.6569483876228333, "learning_rate": 6.360610263522885e-06, "loss": 0.0356, "step": 5258 }, { "epoch": 3.6432282646345686, "grad_norm": 0.5631612539291382, "learning_rate": 6.35991678224688e-06, "loss": 0.0317, "step": 5259 }, { "epoch": 3.643921025285764, "grad_norm": 0.5431450009346008, "learning_rate": 6.3592233009708745e-06, "loss": 0.0265, "step": 5260 }, { "epoch": 3.6446137859369587, "grad_norm": 0.7423073649406433, "learning_rate": 6.358529819694869e-06, "loss": 0.0394, "step": 5261 }, { "epoch": 3.645306546588154, "grad_norm": 0.7692938446998596, "learning_rate": 6.3578363384188635e-06, "loss": 0.0282, "step": 5262 }, { "epoch": 3.645999307239349, "grad_norm": 0.6432677507400513, "learning_rate": 6.357142857142858e-06, "loss": 0.0289, "step": 5263 }, { "epoch": 3.6466920678905437, "grad_norm": 0.7043903470039368, "learning_rate": 6.3564493758668526e-06, "loss": 0.0382, "step": 5264 }, { "epoch": 3.647384828541739, "grad_norm": 0.5756590962409973, "learning_rate": 6.3557558945908475e-06, "loss": 0.0293, "step": 5265 }, { "epoch": 3.648077589192934, "grad_norm": 0.47525158524513245, "learning_rate": 6.355062413314841e-06, "loss": 0.0251, "step": 5266 }, { "epoch": 3.6487703498441286, "grad_norm": 0.6053153276443481, "learning_rate": 6.354368932038836e-06, "loss": 0.0301, "step": 5267 }, { "epoch": 3.649463110495324, "grad_norm": 0.7235785126686096, "learning_rate": 6.35367545076283e-06, "loss": 0.0298, "step": 5268 }, { "epoch": 3.6501558711465187, "grad_norm": 0.6563252210617065, "learning_rate": 6.352981969486825e-06, "loss": 0.0248, "step": 5269 }, { "epoch": 3.650848631797714, "grad_norm": 0.5949928164482117, "learning_rate": 6.35228848821082e-06, "loss": 0.0343, "step": 5270 }, { "epoch": 3.651541392448909, "grad_norm": 0.5927355289459229, "learning_rate": 6.351595006934813e-06, "loss": 0.024, "step": 5271 }, { "epoch": 3.652234153100104, "grad_norm": 0.7582287788391113, "learning_rate": 6.350901525658808e-06, "loss": 0.0479, "step": 5272 }, { "epoch": 3.652926913751299, "grad_norm": 0.5354639887809753, "learning_rate": 6.350208044382802e-06, "loss": 0.0247, "step": 5273 }, { "epoch": 3.653619674402494, "grad_norm": 0.548228919506073, "learning_rate": 6.349514563106797e-06, "loss": 0.0236, "step": 5274 }, { "epoch": 3.654312435053689, "grad_norm": 0.5116567015647888, "learning_rate": 6.348821081830792e-06, "loss": 0.0194, "step": 5275 }, { "epoch": 3.655005195704884, "grad_norm": 0.4989963471889496, "learning_rate": 6.348127600554785e-06, "loss": 0.0246, "step": 5276 }, { "epoch": 3.6556979563560787, "grad_norm": 0.6131188869476318, "learning_rate": 6.34743411927878e-06, "loss": 0.0275, "step": 5277 }, { "epoch": 3.656390717007274, "grad_norm": 0.5656293034553528, "learning_rate": 6.346740638002774e-06, "loss": 0.0253, "step": 5278 }, { "epoch": 3.657083477658469, "grad_norm": 0.6491434574127197, "learning_rate": 6.346047156726769e-06, "loss": 0.0323, "step": 5279 }, { "epoch": 3.657776238309664, "grad_norm": 0.6328729391098022, "learning_rate": 6.345353675450764e-06, "loss": 0.0336, "step": 5280 }, { "epoch": 3.658468998960859, "grad_norm": 0.5684226751327515, "learning_rate": 6.344660194174758e-06, "loss": 0.0307, "step": 5281 }, { "epoch": 3.6591617596120543, "grad_norm": 0.8439211249351501, "learning_rate": 6.343966712898753e-06, "loss": 0.0269, "step": 5282 }, { "epoch": 3.659854520263249, "grad_norm": 0.7445356249809265, "learning_rate": 6.343273231622746e-06, "loss": 0.022, "step": 5283 }, { "epoch": 3.660547280914444, "grad_norm": 0.6294088959693909, "learning_rate": 6.342579750346741e-06, "loss": 0.0424, "step": 5284 }, { "epoch": 3.661240041565639, "grad_norm": 0.6228764057159424, "learning_rate": 6.341886269070736e-06, "loss": 0.0316, "step": 5285 }, { "epoch": 3.661932802216834, "grad_norm": 0.5894211530685425, "learning_rate": 6.34119278779473e-06, "loss": 0.0377, "step": 5286 }, { "epoch": 3.662625562868029, "grad_norm": 0.6125890612602234, "learning_rate": 6.340499306518725e-06, "loss": 0.0321, "step": 5287 }, { "epoch": 3.663318323519224, "grad_norm": 0.5297964215278625, "learning_rate": 6.339805825242718e-06, "loss": 0.0252, "step": 5288 }, { "epoch": 3.664011084170419, "grad_norm": 0.5186530947685242, "learning_rate": 6.339112343966713e-06, "loss": 0.0262, "step": 5289 }, { "epoch": 3.6647038448216143, "grad_norm": 0.5094587206840515, "learning_rate": 6.338418862690708e-06, "loss": 0.0237, "step": 5290 }, { "epoch": 3.665396605472809, "grad_norm": 0.6024801731109619, "learning_rate": 6.337725381414702e-06, "loss": 0.0303, "step": 5291 }, { "epoch": 3.6660893661240044, "grad_norm": 0.5105265378952026, "learning_rate": 6.337031900138697e-06, "loss": 0.023, "step": 5292 }, { "epoch": 3.666782126775199, "grad_norm": 0.5699858665466309, "learning_rate": 6.336338418862691e-06, "loss": 0.0275, "step": 5293 }, { "epoch": 3.667474887426394, "grad_norm": 0.474332332611084, "learning_rate": 6.335644937586686e-06, "loss": 0.0217, "step": 5294 }, { "epoch": 3.6681676480775893, "grad_norm": 0.5792379379272461, "learning_rate": 6.33495145631068e-06, "loss": 0.0297, "step": 5295 }, { "epoch": 3.668860408728784, "grad_norm": 0.6319828629493713, "learning_rate": 6.334257975034674e-06, "loss": 0.0402, "step": 5296 }, { "epoch": 3.669553169379979, "grad_norm": 0.5288411378860474, "learning_rate": 6.333564493758669e-06, "loss": 0.0344, "step": 5297 }, { "epoch": 3.6702459300311743, "grad_norm": 0.48269525170326233, "learning_rate": 6.332871012482663e-06, "loss": 0.0209, "step": 5298 }, { "epoch": 3.670938690682369, "grad_norm": 0.6102100014686584, "learning_rate": 6.332177531206658e-06, "loss": 0.0266, "step": 5299 }, { "epoch": 3.6716314513335644, "grad_norm": 0.532383918762207, "learning_rate": 6.331484049930653e-06, "loss": 0.0235, "step": 5300 }, { "epoch": 3.6723242119847592, "grad_norm": 0.48872363567352295, "learning_rate": 6.3307905686546465e-06, "loss": 0.0203, "step": 5301 }, { "epoch": 3.6730169726359545, "grad_norm": 0.5926398634910583, "learning_rate": 6.330097087378641e-06, "loss": 0.0295, "step": 5302 }, { "epoch": 3.6737097332871493, "grad_norm": 0.525722324848175, "learning_rate": 6.3294036061026355e-06, "loss": 0.023, "step": 5303 }, { "epoch": 3.674402493938344, "grad_norm": 0.5855699777603149, "learning_rate": 6.32871012482663e-06, "loss": 0.0382, "step": 5304 }, { "epoch": 3.6750952545895395, "grad_norm": 0.6651132106781006, "learning_rate": 6.328016643550625e-06, "loss": 0.0303, "step": 5305 }, { "epoch": 3.6757880152407343, "grad_norm": 0.617127001285553, "learning_rate": 6.327323162274619e-06, "loss": 0.0302, "step": 5306 }, { "epoch": 3.676480775891929, "grad_norm": 0.5854039192199707, "learning_rate": 6.3266296809986135e-06, "loss": 0.0241, "step": 5307 }, { "epoch": 3.6771735365431244, "grad_norm": 0.559038519859314, "learning_rate": 6.325936199722608e-06, "loss": 0.028, "step": 5308 }, { "epoch": 3.6778662971943192, "grad_norm": 0.6301884055137634, "learning_rate": 6.3252427184466025e-06, "loss": 0.0325, "step": 5309 }, { "epoch": 3.6785590578455145, "grad_norm": 0.6372119784355164, "learning_rate": 6.3245492371705975e-06, "loss": 0.0301, "step": 5310 }, { "epoch": 3.6792518184967093, "grad_norm": 0.6511542201042175, "learning_rate": 6.3238557558945916e-06, "loss": 0.026, "step": 5311 }, { "epoch": 3.6799445791479046, "grad_norm": 0.67556232213974, "learning_rate": 6.3231622746185865e-06, "loss": 0.0337, "step": 5312 }, { "epoch": 3.6806373397990995, "grad_norm": 0.605854868888855, "learning_rate": 6.32246879334258e-06, "loss": 0.0328, "step": 5313 }, { "epoch": 3.6813301004502943, "grad_norm": 0.5693877935409546, "learning_rate": 6.321775312066575e-06, "loss": 0.0285, "step": 5314 }, { "epoch": 3.6820228611014896, "grad_norm": 0.6283695697784424, "learning_rate": 6.32108183079057e-06, "loss": 0.0318, "step": 5315 }, { "epoch": 3.6827156217526844, "grad_norm": 0.6185287833213806, "learning_rate": 6.320388349514564e-06, "loss": 0.0278, "step": 5316 }, { "epoch": 3.6834083824038792, "grad_norm": 0.5208211541175842, "learning_rate": 6.319694868238559e-06, "loss": 0.026, "step": 5317 }, { "epoch": 3.6841011430550745, "grad_norm": 0.5192283987998962, "learning_rate": 6.319001386962552e-06, "loss": 0.0299, "step": 5318 }, { "epoch": 3.6847939037062694, "grad_norm": 0.6171731352806091, "learning_rate": 6.318307905686547e-06, "loss": 0.031, "step": 5319 }, { "epoch": 3.6854866643574646, "grad_norm": 0.5296388864517212, "learning_rate": 6.317614424410542e-06, "loss": 0.0316, "step": 5320 }, { "epoch": 3.6861794250086595, "grad_norm": 0.6835871338844299, "learning_rate": 6.316920943134536e-06, "loss": 0.0355, "step": 5321 }, { "epoch": 3.6868721856598548, "grad_norm": 0.5253806710243225, "learning_rate": 6.316227461858531e-06, "loss": 0.0294, "step": 5322 }, { "epoch": 3.6875649463110496, "grad_norm": 0.5869537591934204, "learning_rate": 6.315533980582525e-06, "loss": 0.0274, "step": 5323 }, { "epoch": 3.6882577069622444, "grad_norm": 0.5435597896575928, "learning_rate": 6.314840499306519e-06, "loss": 0.0321, "step": 5324 }, { "epoch": 3.6889504676134397, "grad_norm": 0.5530980825424194, "learning_rate": 6.314147018030514e-06, "loss": 0.027, "step": 5325 }, { "epoch": 3.6896432282646345, "grad_norm": 0.5563802123069763, "learning_rate": 6.313453536754508e-06, "loss": 0.0292, "step": 5326 }, { "epoch": 3.6903359889158294, "grad_norm": 0.4938030540943146, "learning_rate": 6.312760055478503e-06, "loss": 0.0224, "step": 5327 }, { "epoch": 3.6910287495670246, "grad_norm": 0.5866602659225464, "learning_rate": 6.312066574202497e-06, "loss": 0.0284, "step": 5328 }, { "epoch": 3.6917215102182195, "grad_norm": 0.6909619569778442, "learning_rate": 6.311373092926492e-06, "loss": 0.0453, "step": 5329 }, { "epoch": 3.6924142708694148, "grad_norm": 0.6352144479751587, "learning_rate": 6.310679611650487e-06, "loss": 0.0344, "step": 5330 }, { "epoch": 3.6931070315206096, "grad_norm": 0.6577898859977722, "learning_rate": 6.30998613037448e-06, "loss": 0.0373, "step": 5331 }, { "epoch": 3.693799792171805, "grad_norm": 0.5675220489501953, "learning_rate": 6.309292649098475e-06, "loss": 0.0272, "step": 5332 }, { "epoch": 3.6944925528229997, "grad_norm": 0.702862560749054, "learning_rate": 6.308599167822469e-06, "loss": 0.0369, "step": 5333 }, { "epoch": 3.6951853134741945, "grad_norm": 0.7315636873245239, "learning_rate": 6.307905686546464e-06, "loss": 0.0381, "step": 5334 }, { "epoch": 3.69587807412539, "grad_norm": 0.6276235580444336, "learning_rate": 6.307212205270459e-06, "loss": 0.0262, "step": 5335 }, { "epoch": 3.6965708347765847, "grad_norm": 0.4969485402107239, "learning_rate": 6.306518723994452e-06, "loss": 0.0351, "step": 5336 }, { "epoch": 3.6972635954277795, "grad_norm": 0.6115286946296692, "learning_rate": 6.305825242718447e-06, "loss": 0.0372, "step": 5337 }, { "epoch": 3.6979563560789748, "grad_norm": 0.6383223533630371, "learning_rate": 6.305131761442441e-06, "loss": 0.0347, "step": 5338 }, { "epoch": 3.6986491167301696, "grad_norm": 0.6415778994560242, "learning_rate": 6.304438280166436e-06, "loss": 0.0291, "step": 5339 }, { "epoch": 3.699341877381365, "grad_norm": 0.593380331993103, "learning_rate": 6.303744798890431e-06, "loss": 0.033, "step": 5340 }, { "epoch": 3.7000346380325597, "grad_norm": 0.5095431208610535, "learning_rate": 6.303051317614425e-06, "loss": 0.0277, "step": 5341 }, { "epoch": 3.700727398683755, "grad_norm": 0.6358219981193542, "learning_rate": 6.30235783633842e-06, "loss": 0.0368, "step": 5342 }, { "epoch": 3.70142015933495, "grad_norm": 0.4579196572303772, "learning_rate": 6.301664355062413e-06, "loss": 0.0236, "step": 5343 }, { "epoch": 3.7021129199861447, "grad_norm": 0.6052901744842529, "learning_rate": 6.300970873786408e-06, "loss": 0.0378, "step": 5344 }, { "epoch": 3.70280568063734, "grad_norm": 0.5675000548362732, "learning_rate": 6.300277392510403e-06, "loss": 0.0292, "step": 5345 }, { "epoch": 3.703498441288535, "grad_norm": 0.5974627137184143, "learning_rate": 6.299583911234397e-06, "loss": 0.0304, "step": 5346 }, { "epoch": 3.7041912019397296, "grad_norm": 0.4792574942111969, "learning_rate": 6.298890429958392e-06, "loss": 0.0201, "step": 5347 }, { "epoch": 3.704883962590925, "grad_norm": 0.7189779877662659, "learning_rate": 6.2981969486823855e-06, "loss": 0.0322, "step": 5348 }, { "epoch": 3.7055767232421197, "grad_norm": 0.459058940410614, "learning_rate": 6.29750346740638e-06, "loss": 0.0204, "step": 5349 }, { "epoch": 3.706269483893315, "grad_norm": 0.633417546749115, "learning_rate": 6.296809986130375e-06, "loss": 0.0332, "step": 5350 }, { "epoch": 3.70696224454451, "grad_norm": 0.6857876181602478, "learning_rate": 6.296116504854369e-06, "loss": 0.0356, "step": 5351 }, { "epoch": 3.707655005195705, "grad_norm": 0.6905134916305542, "learning_rate": 6.295423023578364e-06, "loss": 0.027, "step": 5352 }, { "epoch": 3.7083477658469, "grad_norm": 0.7333101034164429, "learning_rate": 6.294729542302358e-06, "loss": 0.0356, "step": 5353 }, { "epoch": 3.709040526498095, "grad_norm": 0.6089735627174377, "learning_rate": 6.2940360610263525e-06, "loss": 0.0354, "step": 5354 }, { "epoch": 3.70973328714929, "grad_norm": 0.6550477743148804, "learning_rate": 6.2933425797503475e-06, "loss": 0.0431, "step": 5355 }, { "epoch": 3.710426047800485, "grad_norm": 0.5756028890609741, "learning_rate": 6.2926490984743415e-06, "loss": 0.0302, "step": 5356 }, { "epoch": 3.7111188084516797, "grad_norm": 0.5778351426124573, "learning_rate": 6.2919556171983365e-06, "loss": 0.0277, "step": 5357 }, { "epoch": 3.711811569102875, "grad_norm": 0.5539336204528809, "learning_rate": 6.2912621359223306e-06, "loss": 0.0241, "step": 5358 }, { "epoch": 3.71250432975407, "grad_norm": 0.7275177240371704, "learning_rate": 6.2905686546463255e-06, "loss": 0.0313, "step": 5359 }, { "epoch": 3.713197090405265, "grad_norm": 0.5739328861236572, "learning_rate": 6.2898751733703204e-06, "loss": 0.0349, "step": 5360 }, { "epoch": 3.71388985105646, "grad_norm": 0.7920843958854675, "learning_rate": 6.289181692094314e-06, "loss": 0.0356, "step": 5361 }, { "epoch": 3.7145826117076552, "grad_norm": 0.5790738463401794, "learning_rate": 6.288488210818309e-06, "loss": 0.0309, "step": 5362 }, { "epoch": 3.71527537235885, "grad_norm": 0.592981219291687, "learning_rate": 6.287794729542303e-06, "loss": 0.0317, "step": 5363 }, { "epoch": 3.715968133010045, "grad_norm": 0.5291422605514526, "learning_rate": 6.287101248266298e-06, "loss": 0.0258, "step": 5364 }, { "epoch": 3.71666089366124, "grad_norm": 0.6955481767654419, "learning_rate": 6.2864077669902926e-06, "loss": 0.0306, "step": 5365 }, { "epoch": 3.717353654312435, "grad_norm": 0.7937054634094238, "learning_rate": 6.285714285714286e-06, "loss": 0.0359, "step": 5366 }, { "epoch": 3.71804641496363, "grad_norm": 0.7813820838928223, "learning_rate": 6.285020804438281e-06, "loss": 0.0306, "step": 5367 }, { "epoch": 3.718739175614825, "grad_norm": 0.5447640419006348, "learning_rate": 6.284327323162275e-06, "loss": 0.0304, "step": 5368 }, { "epoch": 3.71943193626602, "grad_norm": 0.6013126373291016, "learning_rate": 6.28363384188627e-06, "loss": 0.0231, "step": 5369 }, { "epoch": 3.7201246969172153, "grad_norm": 0.6309574842453003, "learning_rate": 6.282940360610265e-06, "loss": 0.0348, "step": 5370 }, { "epoch": 3.72081745756841, "grad_norm": 0.5237972140312195, "learning_rate": 6.282246879334259e-06, "loss": 0.0301, "step": 5371 }, { "epoch": 3.7215102182196054, "grad_norm": 0.680164098739624, "learning_rate": 6.281553398058253e-06, "loss": 0.0352, "step": 5372 }, { "epoch": 3.7222029788708, "grad_norm": 0.5138393640518188, "learning_rate": 6.280859916782247e-06, "loss": 0.0282, "step": 5373 }, { "epoch": 3.722895739521995, "grad_norm": 0.6719475984573364, "learning_rate": 6.280166435506242e-06, "loss": 0.0426, "step": 5374 }, { "epoch": 3.7235885001731903, "grad_norm": 0.4255213737487793, "learning_rate": 6.279472954230237e-06, "loss": 0.0243, "step": 5375 }, { "epoch": 3.724281260824385, "grad_norm": 0.6353483200073242, "learning_rate": 6.278779472954231e-06, "loss": 0.0375, "step": 5376 }, { "epoch": 3.72497402147558, "grad_norm": 0.6046718955039978, "learning_rate": 6.278085991678226e-06, "loss": 0.0277, "step": 5377 }, { "epoch": 3.7256667821267753, "grad_norm": 0.6534799933433533, "learning_rate": 6.277392510402219e-06, "loss": 0.0332, "step": 5378 }, { "epoch": 3.72635954277797, "grad_norm": 0.5492955446243286, "learning_rate": 6.276699029126214e-06, "loss": 0.029, "step": 5379 }, { "epoch": 3.7270523034291654, "grad_norm": 0.4888821840286255, "learning_rate": 6.276005547850209e-06, "loss": 0.033, "step": 5380 }, { "epoch": 3.72774506408036, "grad_norm": 0.6340741515159607, "learning_rate": 6.275312066574203e-06, "loss": 0.0322, "step": 5381 }, { "epoch": 3.7284378247315555, "grad_norm": 0.5831493735313416, "learning_rate": 6.274618585298198e-06, "loss": 0.028, "step": 5382 }, { "epoch": 3.7291305853827503, "grad_norm": 0.6150385141372681, "learning_rate": 6.273925104022191e-06, "loss": 0.0385, "step": 5383 }, { "epoch": 3.729823346033945, "grad_norm": 0.5997127890586853, "learning_rate": 6.273231622746186e-06, "loss": 0.0287, "step": 5384 }, { "epoch": 3.7305161066851404, "grad_norm": 0.4969633221626282, "learning_rate": 6.272538141470181e-06, "loss": 0.0303, "step": 5385 }, { "epoch": 3.7312088673363353, "grad_norm": 0.6962485313415527, "learning_rate": 6.271844660194175e-06, "loss": 0.0288, "step": 5386 }, { "epoch": 3.73190162798753, "grad_norm": 0.708992600440979, "learning_rate": 6.27115117891817e-06, "loss": 0.052, "step": 5387 }, { "epoch": 3.7325943886387254, "grad_norm": 0.5180173516273499, "learning_rate": 6.270457697642164e-06, "loss": 0.0242, "step": 5388 }, { "epoch": 3.7332871492899202, "grad_norm": 0.6726405024528503, "learning_rate": 6.269764216366159e-06, "loss": 0.038, "step": 5389 }, { "epoch": 3.7339799099411155, "grad_norm": 0.6243935823440552, "learning_rate": 6.269070735090154e-06, "loss": 0.0417, "step": 5390 }, { "epoch": 3.7346726705923103, "grad_norm": 0.5270748734474182, "learning_rate": 6.268377253814147e-06, "loss": 0.0264, "step": 5391 }, { "epoch": 3.7353654312435056, "grad_norm": 0.674968421459198, "learning_rate": 6.267683772538142e-06, "loss": 0.0388, "step": 5392 }, { "epoch": 3.7360581918947005, "grad_norm": 0.6683635711669922, "learning_rate": 6.266990291262136e-06, "loss": 0.038, "step": 5393 }, { "epoch": 3.7367509525458953, "grad_norm": 0.5001316070556641, "learning_rate": 6.266296809986131e-06, "loss": 0.0227, "step": 5394 }, { "epoch": 3.7374437131970906, "grad_norm": 0.6227713227272034, "learning_rate": 6.265603328710126e-06, "loss": 0.0339, "step": 5395 }, { "epoch": 3.7381364738482854, "grad_norm": 0.5813007950782776, "learning_rate": 6.264909847434119e-06, "loss": 0.0345, "step": 5396 }, { "epoch": 3.7388292344994802, "grad_norm": 0.5505149960517883, "learning_rate": 6.264216366158114e-06, "loss": 0.0266, "step": 5397 }, { "epoch": 3.7395219951506755, "grad_norm": 0.5475566983222961, "learning_rate": 6.263522884882108e-06, "loss": 0.0258, "step": 5398 }, { "epoch": 3.7402147558018703, "grad_norm": 0.5670726895332336, "learning_rate": 6.262829403606103e-06, "loss": 0.0259, "step": 5399 }, { "epoch": 3.7409075164530656, "grad_norm": 0.6908154487609863, "learning_rate": 6.262135922330098e-06, "loss": 0.035, "step": 5400 }, { "epoch": 3.7416002771042605, "grad_norm": 0.630292534828186, "learning_rate": 6.2614424410540915e-06, "loss": 0.033, "step": 5401 }, { "epoch": 3.7422930377554557, "grad_norm": 0.7584119439125061, "learning_rate": 6.2607489597780865e-06, "loss": 0.0386, "step": 5402 }, { "epoch": 3.7429857984066506, "grad_norm": 0.7879133224487305, "learning_rate": 6.2600554785020805e-06, "loss": 0.0407, "step": 5403 }, { "epoch": 3.7436785590578454, "grad_norm": 0.6253929734230042, "learning_rate": 6.2593619972260755e-06, "loss": 0.028, "step": 5404 }, { "epoch": 3.7443713197090407, "grad_norm": 0.6191489696502686, "learning_rate": 6.25866851595007e-06, "loss": 0.0308, "step": 5405 }, { "epoch": 3.7450640803602355, "grad_norm": 0.5253886580467224, "learning_rate": 6.2579750346740645e-06, "loss": 0.0262, "step": 5406 }, { "epoch": 3.7457568410114304, "grad_norm": 0.4761843681335449, "learning_rate": 6.2572815533980594e-06, "loss": 0.0253, "step": 5407 }, { "epoch": 3.7464496016626256, "grad_norm": 0.6056466698646545, "learning_rate": 6.256588072122053e-06, "loss": 0.0271, "step": 5408 }, { "epoch": 3.7471423623138205, "grad_norm": 0.6499962210655212, "learning_rate": 6.255894590846048e-06, "loss": 0.0358, "step": 5409 }, { "epoch": 3.7478351229650158, "grad_norm": 0.5838296413421631, "learning_rate": 6.2552011095700425e-06, "loss": 0.0257, "step": 5410 }, { "epoch": 3.7485278836162106, "grad_norm": 0.4950014352798462, "learning_rate": 6.254507628294037e-06, "loss": 0.0266, "step": 5411 }, { "epoch": 3.749220644267406, "grad_norm": 0.8059121370315552, "learning_rate": 6.2538141470180316e-06, "loss": 0.0445, "step": 5412 }, { "epoch": 3.7499134049186007, "grad_norm": 0.5892294645309448, "learning_rate": 6.253120665742025e-06, "loss": 0.035, "step": 5413 }, { "epoch": 3.7506061655697955, "grad_norm": 0.5557952523231506, "learning_rate": 6.25242718446602e-06, "loss": 0.0301, "step": 5414 }, { "epoch": 3.751298926220991, "grad_norm": 0.515476644039154, "learning_rate": 6.251733703190015e-06, "loss": 0.0289, "step": 5415 }, { "epoch": 3.7519916868721856, "grad_norm": 0.7727710604667664, "learning_rate": 6.251040221914009e-06, "loss": 0.0333, "step": 5416 }, { "epoch": 3.7526844475233805, "grad_norm": 0.6355763077735901, "learning_rate": 6.250346740638004e-06, "loss": 0.0342, "step": 5417 }, { "epoch": 3.7533772081745758, "grad_norm": 0.5792461037635803, "learning_rate": 6.249653259361998e-06, "loss": 0.0362, "step": 5418 }, { "epoch": 3.7540699688257706, "grad_norm": 0.7207496166229248, "learning_rate": 6.248959778085993e-06, "loss": 0.0407, "step": 5419 }, { "epoch": 3.7547627294769654, "grad_norm": 0.7004422545433044, "learning_rate": 6.248266296809986e-06, "loss": 0.0352, "step": 5420 }, { "epoch": 3.7554554901281607, "grad_norm": 0.7354230284690857, "learning_rate": 6.247572815533981e-06, "loss": 0.0365, "step": 5421 }, { "epoch": 3.756148250779356, "grad_norm": 0.5657079815864563, "learning_rate": 6.246879334257976e-06, "loss": 0.0211, "step": 5422 }, { "epoch": 3.756841011430551, "grad_norm": 0.5205053687095642, "learning_rate": 6.24618585298197e-06, "loss": 0.0214, "step": 5423 }, { "epoch": 3.7575337720817457, "grad_norm": 0.5499959588050842, "learning_rate": 6.245492371705965e-06, "loss": 0.0276, "step": 5424 }, { "epoch": 3.758226532732941, "grad_norm": 0.7697461843490601, "learning_rate": 6.244798890429958e-06, "loss": 0.0379, "step": 5425 }, { "epoch": 3.7589192933841358, "grad_norm": 0.6891419291496277, "learning_rate": 6.244105409153953e-06, "loss": 0.0324, "step": 5426 }, { "epoch": 3.7596120540353306, "grad_norm": 0.5204426050186157, "learning_rate": 6.243411927877948e-06, "loss": 0.0235, "step": 5427 }, { "epoch": 3.760304814686526, "grad_norm": 0.6464238166809082, "learning_rate": 6.242718446601942e-06, "loss": 0.0334, "step": 5428 }, { "epoch": 3.7609975753377207, "grad_norm": 0.6387354731559753, "learning_rate": 6.242024965325937e-06, "loss": 0.0402, "step": 5429 }, { "epoch": 3.7616903359889156, "grad_norm": 0.5376962423324585, "learning_rate": 6.24133148404993e-06, "loss": 0.0261, "step": 5430 }, { "epoch": 3.762383096640111, "grad_norm": 0.6329233050346375, "learning_rate": 6.240638002773925e-06, "loss": 0.0304, "step": 5431 }, { "epoch": 3.763075857291306, "grad_norm": 0.751514196395874, "learning_rate": 6.23994452149792e-06, "loss": 0.0323, "step": 5432 }, { "epoch": 3.763768617942501, "grad_norm": 0.6077172756195068, "learning_rate": 6.239251040221914e-06, "loss": 0.0276, "step": 5433 }, { "epoch": 3.764461378593696, "grad_norm": 0.5049221515655518, "learning_rate": 6.238557558945909e-06, "loss": 0.0251, "step": 5434 }, { "epoch": 3.765154139244891, "grad_norm": 0.6007904410362244, "learning_rate": 6.237864077669903e-06, "loss": 0.0291, "step": 5435 }, { "epoch": 3.765846899896086, "grad_norm": 0.628989040851593, "learning_rate": 6.237170596393898e-06, "loss": 0.0315, "step": 5436 }, { "epoch": 3.7665396605472807, "grad_norm": 0.6191542744636536, "learning_rate": 6.236477115117893e-06, "loss": 0.0352, "step": 5437 }, { "epoch": 3.767232421198476, "grad_norm": 0.5907944440841675, "learning_rate": 6.235783633841886e-06, "loss": 0.0325, "step": 5438 }, { "epoch": 3.767925181849671, "grad_norm": 0.6437923908233643, "learning_rate": 6.235090152565881e-06, "loss": 0.031, "step": 5439 }, { "epoch": 3.7686179425008657, "grad_norm": 0.6748537421226501, "learning_rate": 6.234396671289875e-06, "loss": 0.0264, "step": 5440 }, { "epoch": 3.769310703152061, "grad_norm": 0.7189391851425171, "learning_rate": 6.23370319001387e-06, "loss": 0.032, "step": 5441 }, { "epoch": 3.7700034638032562, "grad_norm": 0.6138607263565063, "learning_rate": 6.233009708737865e-06, "loss": 0.0305, "step": 5442 }, { "epoch": 3.770696224454451, "grad_norm": 0.546404242515564, "learning_rate": 6.232316227461858e-06, "loss": 0.0293, "step": 5443 }, { "epoch": 3.771388985105646, "grad_norm": 0.6265897750854492, "learning_rate": 6.231622746185853e-06, "loss": 0.0261, "step": 5444 }, { "epoch": 3.772081745756841, "grad_norm": 0.667362630367279, "learning_rate": 6.230929264909847e-06, "loss": 0.0363, "step": 5445 }, { "epoch": 3.772774506408036, "grad_norm": 1.0492887496948242, "learning_rate": 6.230235783633842e-06, "loss": 0.0289, "step": 5446 }, { "epoch": 3.773467267059231, "grad_norm": 0.585996687412262, "learning_rate": 6.229542302357837e-06, "loss": 0.0281, "step": 5447 }, { "epoch": 3.774160027710426, "grad_norm": 0.6105867624282837, "learning_rate": 6.228848821081831e-06, "loss": 0.0269, "step": 5448 }, { "epoch": 3.774852788361621, "grad_norm": 0.5522729158401489, "learning_rate": 6.2281553398058255e-06, "loss": 0.0272, "step": 5449 }, { "epoch": 3.775545549012816, "grad_norm": 0.6558374762535095, "learning_rate": 6.2274618585298195e-06, "loss": 0.0423, "step": 5450 }, { "epoch": 3.776238309664011, "grad_norm": 0.7256389856338501, "learning_rate": 6.2267683772538145e-06, "loss": 0.0315, "step": 5451 }, { "epoch": 3.7769310703152064, "grad_norm": 0.5794988870620728, "learning_rate": 6.226074895977809e-06, "loss": 0.022, "step": 5452 }, { "epoch": 3.777623830966401, "grad_norm": 0.8709037899971008, "learning_rate": 6.2253814147018035e-06, "loss": 0.0367, "step": 5453 }, { "epoch": 3.778316591617596, "grad_norm": 0.5693874955177307, "learning_rate": 6.2246879334257984e-06, "loss": 0.0297, "step": 5454 }, { "epoch": 3.7790093522687913, "grad_norm": 0.7096821665763855, "learning_rate": 6.223994452149792e-06, "loss": 0.0353, "step": 5455 }, { "epoch": 3.779702112919986, "grad_norm": 0.6339651942253113, "learning_rate": 6.223300970873787e-06, "loss": 0.0359, "step": 5456 }, { "epoch": 3.780394873571181, "grad_norm": 0.6035611629486084, "learning_rate": 6.2226074895977815e-06, "loss": 0.0243, "step": 5457 }, { "epoch": 3.7810876342223763, "grad_norm": 0.6057316660881042, "learning_rate": 6.221914008321776e-06, "loss": 0.0312, "step": 5458 }, { "epoch": 3.781780394873571, "grad_norm": 0.6637762784957886, "learning_rate": 6.2212205270457706e-06, "loss": 0.0245, "step": 5459 }, { "epoch": 3.782473155524766, "grad_norm": 0.630608081817627, "learning_rate": 6.220527045769764e-06, "loss": 0.0315, "step": 5460 }, { "epoch": 3.783165916175961, "grad_norm": 0.8728220462799072, "learning_rate": 6.219833564493759e-06, "loss": 0.0419, "step": 5461 }, { "epoch": 3.7838586768271565, "grad_norm": 0.6907286643981934, "learning_rate": 6.219140083217754e-06, "loss": 0.0333, "step": 5462 }, { "epoch": 3.7845514374783513, "grad_norm": 0.6949223875999451, "learning_rate": 6.218446601941748e-06, "loss": 0.0442, "step": 5463 }, { "epoch": 3.785244198129546, "grad_norm": 0.5685586333274841, "learning_rate": 6.217753120665743e-06, "loss": 0.0272, "step": 5464 }, { "epoch": 3.7859369587807414, "grad_norm": 0.6025075316429138, "learning_rate": 6.217059639389737e-06, "loss": 0.0313, "step": 5465 }, { "epoch": 3.7866297194319363, "grad_norm": 0.5943772792816162, "learning_rate": 6.216366158113732e-06, "loss": 0.0309, "step": 5466 }, { "epoch": 3.787322480083131, "grad_norm": 0.6488465666770935, "learning_rate": 6.215672676837727e-06, "loss": 0.0302, "step": 5467 }, { "epoch": 3.7880152407343264, "grad_norm": 0.6524391770362854, "learning_rate": 6.21497919556172e-06, "loss": 0.0335, "step": 5468 }, { "epoch": 3.788708001385521, "grad_norm": 0.5891224145889282, "learning_rate": 6.214285714285715e-06, "loss": 0.0315, "step": 5469 }, { "epoch": 3.789400762036716, "grad_norm": 0.5956622958183289, "learning_rate": 6.213592233009709e-06, "loss": 0.0355, "step": 5470 }, { "epoch": 3.7900935226879113, "grad_norm": 0.587727427482605, "learning_rate": 6.212898751733704e-06, "loss": 0.0316, "step": 5471 }, { "epoch": 3.7907862833391066, "grad_norm": 0.6151914000511169, "learning_rate": 6.212205270457699e-06, "loss": 0.0283, "step": 5472 }, { "epoch": 3.7914790439903014, "grad_norm": 0.6343703866004944, "learning_rate": 6.211511789181692e-06, "loss": 0.037, "step": 5473 }, { "epoch": 3.7921718046414963, "grad_norm": 0.6419991254806519, "learning_rate": 6.210818307905687e-06, "loss": 0.0381, "step": 5474 }, { "epoch": 3.7928645652926916, "grad_norm": 0.5683888792991638, "learning_rate": 6.210124826629681e-06, "loss": 0.0248, "step": 5475 }, { "epoch": 3.7935573259438864, "grad_norm": 0.5710171461105347, "learning_rate": 6.209431345353676e-06, "loss": 0.0286, "step": 5476 }, { "epoch": 3.7942500865950812, "grad_norm": 0.7465535402297974, "learning_rate": 6.208737864077671e-06, "loss": 0.0306, "step": 5477 }, { "epoch": 3.7949428472462765, "grad_norm": 0.5855188965797424, "learning_rate": 6.208044382801665e-06, "loss": 0.0278, "step": 5478 }, { "epoch": 3.7956356078974713, "grad_norm": 0.6260008215904236, "learning_rate": 6.207350901525659e-06, "loss": 0.0298, "step": 5479 }, { "epoch": 3.796328368548666, "grad_norm": 0.6134366393089294, "learning_rate": 6.206657420249653e-06, "loss": 0.0353, "step": 5480 }, { "epoch": 3.7970211291998615, "grad_norm": 0.884404182434082, "learning_rate": 6.205963938973648e-06, "loss": 0.0332, "step": 5481 }, { "epoch": 3.7977138898510567, "grad_norm": 0.659879744052887, "learning_rate": 6.205270457697643e-06, "loss": 0.0392, "step": 5482 }, { "epoch": 3.7984066505022516, "grad_norm": 0.6756446361541748, "learning_rate": 6.204576976421637e-06, "loss": 0.0393, "step": 5483 }, { "epoch": 3.7990994111534464, "grad_norm": 0.6037367582321167, "learning_rate": 6.203883495145632e-06, "loss": 0.0301, "step": 5484 }, { "epoch": 3.7997921718046417, "grad_norm": 0.6975623965263367, "learning_rate": 6.203190013869625e-06, "loss": 0.0311, "step": 5485 }, { "epoch": 3.8004849324558365, "grad_norm": 0.5865778923034668, "learning_rate": 6.20249653259362e-06, "loss": 0.0378, "step": 5486 }, { "epoch": 3.8011776931070314, "grad_norm": 0.6444191336631775, "learning_rate": 6.201803051317615e-06, "loss": 0.0267, "step": 5487 }, { "epoch": 3.8018704537582266, "grad_norm": 0.602146327495575, "learning_rate": 6.201109570041609e-06, "loss": 0.0319, "step": 5488 }, { "epoch": 3.8025632144094215, "grad_norm": 0.5784410238265991, "learning_rate": 6.200416088765604e-06, "loss": 0.0253, "step": 5489 }, { "epoch": 3.8032559750606163, "grad_norm": 0.5727723836898804, "learning_rate": 6.199722607489597e-06, "loss": 0.029, "step": 5490 }, { "epoch": 3.8039487357118116, "grad_norm": 0.635518491268158, "learning_rate": 6.199029126213592e-06, "loss": 0.0314, "step": 5491 }, { "epoch": 3.804641496363007, "grad_norm": 0.6199983954429626, "learning_rate": 6.198335644937587e-06, "loss": 0.0227, "step": 5492 }, { "epoch": 3.8053342570142017, "grad_norm": 0.6068674921989441, "learning_rate": 6.197642163661581e-06, "loss": 0.0287, "step": 5493 }, { "epoch": 3.8060270176653965, "grad_norm": 0.5692973732948303, "learning_rate": 6.196948682385576e-06, "loss": 0.029, "step": 5494 }, { "epoch": 3.806719778316592, "grad_norm": 0.553572416305542, "learning_rate": 6.19625520110957e-06, "loss": 0.0331, "step": 5495 }, { "epoch": 3.8074125389677866, "grad_norm": 0.6230151653289795, "learning_rate": 6.195561719833565e-06, "loss": 0.0354, "step": 5496 }, { "epoch": 3.8081052996189815, "grad_norm": 0.6320762634277344, "learning_rate": 6.194868238557559e-06, "loss": 0.0334, "step": 5497 }, { "epoch": 3.8087980602701768, "grad_norm": 0.6541516184806824, "learning_rate": 6.1941747572815535e-06, "loss": 0.029, "step": 5498 }, { "epoch": 3.8094908209213716, "grad_norm": 0.6015684604644775, "learning_rate": 6.193481276005548e-06, "loss": 0.0341, "step": 5499 }, { "epoch": 3.8101835815725664, "grad_norm": 0.6116700768470764, "learning_rate": 6.1927877947295425e-06, "loss": 0.0248, "step": 5500 }, { "epoch": 3.8108763422237617, "grad_norm": 0.6509401798248291, "learning_rate": 6.1920943134535374e-06, "loss": 0.0274, "step": 5501 }, { "epoch": 3.8115691028749565, "grad_norm": 0.4841687083244324, "learning_rate": 6.191400832177532e-06, "loss": 0.0245, "step": 5502 }, { "epoch": 3.812261863526152, "grad_norm": 0.44500479102134705, "learning_rate": 6.190707350901526e-06, "loss": 0.02, "step": 5503 }, { "epoch": 3.8129546241773467, "grad_norm": 0.5654506087303162, "learning_rate": 6.1900138696255205e-06, "loss": 0.0316, "step": 5504 }, { "epoch": 3.813647384828542, "grad_norm": 0.7147808074951172, "learning_rate": 6.189320388349515e-06, "loss": 0.0391, "step": 5505 }, { "epoch": 3.8143401454797368, "grad_norm": 0.6720602512359619, "learning_rate": 6.1886269070735096e-06, "loss": 0.0313, "step": 5506 }, { "epoch": 3.8150329061309316, "grad_norm": 0.7915608882904053, "learning_rate": 6.1879334257975045e-06, "loss": 0.0349, "step": 5507 }, { "epoch": 3.815725666782127, "grad_norm": 0.6050640940666199, "learning_rate": 6.187239944521498e-06, "loss": 0.0361, "step": 5508 }, { "epoch": 3.8164184274333217, "grad_norm": 0.7607973217964172, "learning_rate": 6.186546463245493e-06, "loss": 0.0362, "step": 5509 }, { "epoch": 3.8171111880845165, "grad_norm": 0.6928087472915649, "learning_rate": 6.185852981969487e-06, "loss": 0.0351, "step": 5510 }, { "epoch": 3.817803948735712, "grad_norm": 0.570793867111206, "learning_rate": 6.185159500693482e-06, "loss": 0.0358, "step": 5511 }, { "epoch": 3.8184967093869067, "grad_norm": 0.6071538329124451, "learning_rate": 6.184466019417477e-06, "loss": 0.0263, "step": 5512 }, { "epoch": 3.819189470038102, "grad_norm": 0.6508318781852722, "learning_rate": 6.183772538141471e-06, "loss": 0.0336, "step": 5513 }, { "epoch": 3.8198822306892968, "grad_norm": 0.6919434070587158, "learning_rate": 6.183079056865466e-06, "loss": 0.0317, "step": 5514 }, { "epoch": 3.820574991340492, "grad_norm": 0.7077426314353943, "learning_rate": 6.182385575589459e-06, "loss": 0.0391, "step": 5515 }, { "epoch": 3.821267751991687, "grad_norm": 0.6071457266807556, "learning_rate": 6.181692094313454e-06, "loss": 0.0428, "step": 5516 }, { "epoch": 3.8219605126428817, "grad_norm": 0.5197275876998901, "learning_rate": 6.180998613037449e-06, "loss": 0.0275, "step": 5517 }, { "epoch": 3.822653273294077, "grad_norm": 0.6566160917282104, "learning_rate": 6.180305131761443e-06, "loss": 0.0426, "step": 5518 }, { "epoch": 3.823346033945272, "grad_norm": 0.5395252704620361, "learning_rate": 6.179611650485438e-06, "loss": 0.026, "step": 5519 }, { "epoch": 3.8240387945964667, "grad_norm": 0.8134343028068542, "learning_rate": 6.178918169209431e-06, "loss": 0.0335, "step": 5520 }, { "epoch": 3.824731555247662, "grad_norm": 0.5744146108627319, "learning_rate": 6.178224687933426e-06, "loss": 0.03, "step": 5521 }, { "epoch": 3.825424315898857, "grad_norm": 0.6129289269447327, "learning_rate": 6.177531206657421e-06, "loss": 0.0476, "step": 5522 }, { "epoch": 3.826117076550052, "grad_norm": 0.6244030594825745, "learning_rate": 6.176837725381415e-06, "loss": 0.0328, "step": 5523 }, { "epoch": 3.826809837201247, "grad_norm": 0.5601057410240173, "learning_rate": 6.17614424410541e-06, "loss": 0.0297, "step": 5524 }, { "epoch": 3.827502597852442, "grad_norm": 0.6210319995880127, "learning_rate": 6.175450762829404e-06, "loss": 0.036, "step": 5525 }, { "epoch": 3.828195358503637, "grad_norm": 0.5982537865638733, "learning_rate": 6.174757281553399e-06, "loss": 0.0273, "step": 5526 }, { "epoch": 3.828888119154832, "grad_norm": 0.5864853858947754, "learning_rate": 6.174063800277393e-06, "loss": 0.025, "step": 5527 }, { "epoch": 3.829580879806027, "grad_norm": 0.5640624165534973, "learning_rate": 6.173370319001387e-06, "loss": 0.032, "step": 5528 }, { "epoch": 3.830273640457222, "grad_norm": 0.8922573924064636, "learning_rate": 6.172676837725382e-06, "loss": 0.0304, "step": 5529 }, { "epoch": 3.830966401108417, "grad_norm": 0.5165643692016602, "learning_rate": 6.171983356449376e-06, "loss": 0.0207, "step": 5530 }, { "epoch": 3.831659161759612, "grad_norm": 0.5760807991027832, "learning_rate": 6.171289875173371e-06, "loss": 0.0248, "step": 5531 }, { "epoch": 3.832351922410807, "grad_norm": 0.6061486005783081, "learning_rate": 6.170596393897366e-06, "loss": 0.0361, "step": 5532 }, { "epoch": 3.833044683062002, "grad_norm": 0.5424039363861084, "learning_rate": 6.169902912621359e-06, "loss": 0.0251, "step": 5533 }, { "epoch": 3.833737443713197, "grad_norm": 0.6765320897102356, "learning_rate": 6.169209431345354e-06, "loss": 0.0338, "step": 5534 }, { "epoch": 3.8344302043643923, "grad_norm": 0.6279667019844055, "learning_rate": 6.168515950069348e-06, "loss": 0.0308, "step": 5535 }, { "epoch": 3.835122965015587, "grad_norm": 0.5763036608695984, "learning_rate": 6.167822468793343e-06, "loss": 0.0279, "step": 5536 }, { "epoch": 3.835815725666782, "grad_norm": 0.5578659772872925, "learning_rate": 6.167128987517338e-06, "loss": 0.0281, "step": 5537 }, { "epoch": 3.8365084863179773, "grad_norm": 0.5194475054740906, "learning_rate": 6.166435506241331e-06, "loss": 0.0294, "step": 5538 }, { "epoch": 3.837201246969172, "grad_norm": 0.6886339783668518, "learning_rate": 6.165742024965326e-06, "loss": 0.032, "step": 5539 }, { "epoch": 3.837894007620367, "grad_norm": 0.8126854300498962, "learning_rate": 6.16504854368932e-06, "loss": 0.0259, "step": 5540 }, { "epoch": 3.838586768271562, "grad_norm": 0.6386008262634277, "learning_rate": 6.164355062413315e-06, "loss": 0.031, "step": 5541 }, { "epoch": 3.839279528922757, "grad_norm": 0.5712670683860779, "learning_rate": 6.16366158113731e-06, "loss": 0.0239, "step": 5542 }, { "epoch": 3.8399722895739523, "grad_norm": 0.5645532608032227, "learning_rate": 6.162968099861304e-06, "loss": 0.0222, "step": 5543 }, { "epoch": 3.840665050225147, "grad_norm": 0.7109053730964661, "learning_rate": 6.162274618585299e-06, "loss": 0.0337, "step": 5544 }, { "epoch": 3.8413578108763424, "grad_norm": 0.6322464942932129, "learning_rate": 6.1615811373092925e-06, "loss": 0.0391, "step": 5545 }, { "epoch": 3.8420505715275373, "grad_norm": 0.5483359694480896, "learning_rate": 6.160887656033287e-06, "loss": 0.0256, "step": 5546 }, { "epoch": 3.842743332178732, "grad_norm": 0.6358020305633545, "learning_rate": 6.160194174757282e-06, "loss": 0.0296, "step": 5547 }, { "epoch": 3.8434360928299274, "grad_norm": 0.7286901473999023, "learning_rate": 6.1595006934812764e-06, "loss": 0.0361, "step": 5548 }, { "epoch": 3.844128853481122, "grad_norm": 0.603725254535675, "learning_rate": 6.158807212205271e-06, "loss": 0.0387, "step": 5549 }, { "epoch": 3.844821614132317, "grad_norm": 0.5984010696411133, "learning_rate": 6.158113730929265e-06, "loss": 0.0358, "step": 5550 }, { "epoch": 3.8455143747835123, "grad_norm": 0.5777190327644348, "learning_rate": 6.1574202496532595e-06, "loss": 0.026, "step": 5551 }, { "epoch": 3.846207135434707, "grad_norm": 0.6447054743766785, "learning_rate": 6.1567267683772545e-06, "loss": 0.0343, "step": 5552 }, { "epoch": 3.8468998960859024, "grad_norm": 0.6139410138130188, "learning_rate": 6.1560332871012486e-06, "loss": 0.0285, "step": 5553 }, { "epoch": 3.8475926567370973, "grad_norm": 0.5288937091827393, "learning_rate": 6.1553398058252435e-06, "loss": 0.0235, "step": 5554 }, { "epoch": 3.8482854173882926, "grad_norm": 0.6244781613349915, "learning_rate": 6.1546463245492376e-06, "loss": 0.0272, "step": 5555 }, { "epoch": 3.8489781780394874, "grad_norm": 0.6281181573867798, "learning_rate": 6.153952843273232e-06, "loss": 0.0322, "step": 5556 }, { "epoch": 3.849670938690682, "grad_norm": 0.9273653626441956, "learning_rate": 6.153259361997227e-06, "loss": 0.0321, "step": 5557 }, { "epoch": 3.8503636993418775, "grad_norm": 0.5947158336639404, "learning_rate": 6.152565880721221e-06, "loss": 0.0349, "step": 5558 }, { "epoch": 3.8510564599930723, "grad_norm": 0.5656794905662537, "learning_rate": 6.151872399445216e-06, "loss": 0.0291, "step": 5559 }, { "epoch": 3.851749220644267, "grad_norm": 0.6337341070175171, "learning_rate": 6.15117891816921e-06, "loss": 0.0348, "step": 5560 }, { "epoch": 3.8524419812954624, "grad_norm": 0.6344237923622131, "learning_rate": 6.150485436893205e-06, "loss": 0.0389, "step": 5561 }, { "epoch": 3.8531347419466573, "grad_norm": 0.5519279837608337, "learning_rate": 6.1497919556171996e-06, "loss": 0.0321, "step": 5562 }, { "epoch": 3.8538275025978526, "grad_norm": 0.6443216800689697, "learning_rate": 6.149098474341193e-06, "loss": 0.0361, "step": 5563 }, { "epoch": 3.8545202632490474, "grad_norm": 0.6036908626556396, "learning_rate": 6.148404993065188e-06, "loss": 0.033, "step": 5564 }, { "epoch": 3.8552130239002427, "grad_norm": 0.7718948125839233, "learning_rate": 6.147711511789182e-06, "loss": 0.041, "step": 5565 }, { "epoch": 3.8559057845514375, "grad_norm": 0.6507557034492493, "learning_rate": 6.147018030513177e-06, "loss": 0.0376, "step": 5566 }, { "epoch": 3.8565985452026323, "grad_norm": 0.6405223608016968, "learning_rate": 6.146324549237172e-06, "loss": 0.0363, "step": 5567 }, { "epoch": 3.8572913058538276, "grad_norm": 0.6134037375450134, "learning_rate": 6.145631067961165e-06, "loss": 0.03, "step": 5568 }, { "epoch": 3.8579840665050225, "grad_norm": 0.7090088129043579, "learning_rate": 6.14493758668516e-06, "loss": 0.0369, "step": 5569 }, { "epoch": 3.8586768271562173, "grad_norm": 0.49887773394584656, "learning_rate": 6.144244105409154e-06, "loss": 0.0282, "step": 5570 }, { "epoch": 3.8593695878074126, "grad_norm": 0.5553210377693176, "learning_rate": 6.143550624133149e-06, "loss": 0.0249, "step": 5571 }, { "epoch": 3.8600623484586074, "grad_norm": 0.6809664964675903, "learning_rate": 6.142857142857144e-06, "loss": 0.0339, "step": 5572 }, { "epoch": 3.8607551091098027, "grad_norm": 1.0848239660263062, "learning_rate": 6.142163661581138e-06, "loss": 0.0411, "step": 5573 }, { "epoch": 3.8614478697609975, "grad_norm": 0.6416944265365601, "learning_rate": 6.141470180305133e-06, "loss": 0.0316, "step": 5574 }, { "epoch": 3.862140630412193, "grad_norm": 0.5461816787719727, "learning_rate": 6.140776699029126e-06, "loss": 0.0297, "step": 5575 }, { "epoch": 3.8628333910633876, "grad_norm": 0.5792284607887268, "learning_rate": 6.140083217753121e-06, "loss": 0.038, "step": 5576 }, { "epoch": 3.8635261517145825, "grad_norm": 0.6209005117416382, "learning_rate": 6.139389736477116e-06, "loss": 0.0453, "step": 5577 }, { "epoch": 3.8642189123657777, "grad_norm": 0.5361753702163696, "learning_rate": 6.13869625520111e-06, "loss": 0.0219, "step": 5578 }, { "epoch": 3.8649116730169726, "grad_norm": 0.5698778629302979, "learning_rate": 6.138002773925105e-06, "loss": 0.0281, "step": 5579 }, { "epoch": 3.8656044336681674, "grad_norm": 0.5724604725837708, "learning_rate": 6.137309292649098e-06, "loss": 0.0327, "step": 5580 }, { "epoch": 3.8662971943193627, "grad_norm": 0.6027019619941711, "learning_rate": 6.136615811373093e-06, "loss": 0.0302, "step": 5581 }, { "epoch": 3.8669899549705575, "grad_norm": 0.6003559827804565, "learning_rate": 6.135922330097088e-06, "loss": 0.0274, "step": 5582 }, { "epoch": 3.867682715621753, "grad_norm": 0.5582703948020935, "learning_rate": 6.135228848821082e-06, "loss": 0.0216, "step": 5583 }, { "epoch": 3.8683754762729476, "grad_norm": 0.5991309881210327, "learning_rate": 6.134535367545077e-06, "loss": 0.0356, "step": 5584 }, { "epoch": 3.869068236924143, "grad_norm": 0.6197720766067505, "learning_rate": 6.13384188626907e-06, "loss": 0.034, "step": 5585 }, { "epoch": 3.8697609975753378, "grad_norm": 0.5650931000709534, "learning_rate": 6.133148404993065e-06, "loss": 0.0314, "step": 5586 }, { "epoch": 3.8704537582265326, "grad_norm": 0.6536426544189453, "learning_rate": 6.13245492371706e-06, "loss": 0.0334, "step": 5587 }, { "epoch": 3.871146518877728, "grad_norm": 0.5091869831085205, "learning_rate": 6.131761442441054e-06, "loss": 0.0251, "step": 5588 }, { "epoch": 3.8718392795289227, "grad_norm": 0.6284472942352295, "learning_rate": 6.131067961165049e-06, "loss": 0.0304, "step": 5589 }, { "epoch": 3.8725320401801175, "grad_norm": 0.660051167011261, "learning_rate": 6.130374479889043e-06, "loss": 0.0312, "step": 5590 }, { "epoch": 3.873224800831313, "grad_norm": 0.6200466752052307, "learning_rate": 6.129680998613038e-06, "loss": 0.0322, "step": 5591 }, { "epoch": 3.8739175614825077, "grad_norm": 0.587002158164978, "learning_rate": 6.128987517337033e-06, "loss": 0.0382, "step": 5592 }, { "epoch": 3.874610322133703, "grad_norm": 0.622517466545105, "learning_rate": 6.128294036061026e-06, "loss": 0.0258, "step": 5593 }, { "epoch": 3.8753030827848978, "grad_norm": 0.6990046501159668, "learning_rate": 6.127600554785021e-06, "loss": 0.0259, "step": 5594 }, { "epoch": 3.875995843436093, "grad_norm": 0.4947289824485779, "learning_rate": 6.1269070735090154e-06, "loss": 0.0234, "step": 5595 }, { "epoch": 3.876688604087288, "grad_norm": 0.6375725865364075, "learning_rate": 6.12621359223301e-06, "loss": 0.0319, "step": 5596 }, { "epoch": 3.8773813647384827, "grad_norm": 0.6191786527633667, "learning_rate": 6.125520110957005e-06, "loss": 0.0389, "step": 5597 }, { "epoch": 3.878074125389678, "grad_norm": 0.5996862649917603, "learning_rate": 6.1248266296809985e-06, "loss": 0.0288, "step": 5598 }, { "epoch": 3.878766886040873, "grad_norm": 0.6783562898635864, "learning_rate": 6.1241331484049935e-06, "loss": 0.0346, "step": 5599 }, { "epoch": 3.8794596466920677, "grad_norm": 0.6701961755752563, "learning_rate": 6.1234396671289876e-06, "loss": 0.0368, "step": 5600 }, { "epoch": 3.880152407343263, "grad_norm": 0.55217444896698, "learning_rate": 6.1227461858529825e-06, "loss": 0.0272, "step": 5601 }, { "epoch": 3.880845167994458, "grad_norm": 0.5103873610496521, "learning_rate": 6.122052704576977e-06, "loss": 0.0248, "step": 5602 }, { "epoch": 3.881537928645653, "grad_norm": 0.7307687401771545, "learning_rate": 6.1213592233009715e-06, "loss": 0.0315, "step": 5603 }, { "epoch": 3.882230689296848, "grad_norm": 0.5511279106140137, "learning_rate": 6.120665742024966e-06, "loss": 0.0389, "step": 5604 }, { "epoch": 3.882923449948043, "grad_norm": 0.5821829438209534, "learning_rate": 6.11997226074896e-06, "loss": 0.0252, "step": 5605 }, { "epoch": 3.883616210599238, "grad_norm": 0.689548134803772, "learning_rate": 6.119278779472955e-06, "loss": 0.0344, "step": 5606 }, { "epoch": 3.884308971250433, "grad_norm": 0.6860352158546448, "learning_rate": 6.1185852981969495e-06, "loss": 0.0332, "step": 5607 }, { "epoch": 3.885001731901628, "grad_norm": 0.652440071105957, "learning_rate": 6.117891816920944e-06, "loss": 0.0286, "step": 5608 }, { "epoch": 3.885694492552823, "grad_norm": 0.6276047825813293, "learning_rate": 6.1171983356449386e-06, "loss": 0.0315, "step": 5609 }, { "epoch": 3.886387253204018, "grad_norm": 0.8977293968200684, "learning_rate": 6.116504854368932e-06, "loss": 0.0303, "step": 5610 }, { "epoch": 3.887080013855213, "grad_norm": 0.6617765426635742, "learning_rate": 6.115811373092927e-06, "loss": 0.0344, "step": 5611 }, { "epoch": 3.887772774506408, "grad_norm": 0.6797682642936707, "learning_rate": 6.115117891816922e-06, "loss": 0.0377, "step": 5612 }, { "epoch": 3.888465535157603, "grad_norm": 0.5454661250114441, "learning_rate": 6.114424410540916e-06, "loss": 0.0304, "step": 5613 }, { "epoch": 3.889158295808798, "grad_norm": 0.6362570524215698, "learning_rate": 6.113730929264911e-06, "loss": 0.0283, "step": 5614 }, { "epoch": 3.8898510564599933, "grad_norm": 0.6150993704795837, "learning_rate": 6.113037447988904e-06, "loss": 0.0342, "step": 5615 }, { "epoch": 3.890543817111188, "grad_norm": 0.4676876366138458, "learning_rate": 6.112343966712899e-06, "loss": 0.0208, "step": 5616 }, { "epoch": 3.891236577762383, "grad_norm": 0.6185954213142395, "learning_rate": 6.111650485436894e-06, "loss": 0.0335, "step": 5617 }, { "epoch": 3.8919293384135782, "grad_norm": 0.7884261608123779, "learning_rate": 6.110957004160888e-06, "loss": 0.0414, "step": 5618 }, { "epoch": 3.892622099064773, "grad_norm": 0.6566745042800903, "learning_rate": 6.110263522884883e-06, "loss": 0.0309, "step": 5619 }, { "epoch": 3.893314859715968, "grad_norm": 0.5411038994789124, "learning_rate": 6.109570041608877e-06, "loss": 0.0267, "step": 5620 }, { "epoch": 3.894007620367163, "grad_norm": 0.5566913485527039, "learning_rate": 6.108876560332872e-06, "loss": 0.031, "step": 5621 }, { "epoch": 3.894700381018358, "grad_norm": 0.49798959493637085, "learning_rate": 6.108183079056867e-06, "loss": 0.0319, "step": 5622 }, { "epoch": 3.8953931416695533, "grad_norm": 0.6349419951438904, "learning_rate": 6.10748959778086e-06, "loss": 0.0277, "step": 5623 }, { "epoch": 3.896085902320748, "grad_norm": 0.6099962592124939, "learning_rate": 6.106796116504855e-06, "loss": 0.0303, "step": 5624 }, { "epoch": 3.8967786629719434, "grad_norm": 0.6855891346931458, "learning_rate": 6.106102635228849e-06, "loss": 0.041, "step": 5625 }, { "epoch": 3.8974714236231383, "grad_norm": 0.6275585293769836, "learning_rate": 6.105409153952844e-06, "loss": 0.0332, "step": 5626 }, { "epoch": 3.898164184274333, "grad_norm": 0.5623182654380798, "learning_rate": 6.104715672676839e-06, "loss": 0.0284, "step": 5627 }, { "epoch": 3.8988569449255284, "grad_norm": 0.5225732922554016, "learning_rate": 6.104022191400832e-06, "loss": 0.0332, "step": 5628 }, { "epoch": 3.899549705576723, "grad_norm": 0.7272273302078247, "learning_rate": 6.103328710124827e-06, "loss": 0.0357, "step": 5629 }, { "epoch": 3.900242466227918, "grad_norm": 0.571682870388031, "learning_rate": 6.102635228848821e-06, "loss": 0.0286, "step": 5630 }, { "epoch": 3.9009352268791133, "grad_norm": 0.5897750854492188, "learning_rate": 6.101941747572816e-06, "loss": 0.0313, "step": 5631 }, { "epoch": 3.901627987530308, "grad_norm": 0.839536726474762, "learning_rate": 6.101248266296811e-06, "loss": 0.0341, "step": 5632 }, { "epoch": 3.9023207481815034, "grad_norm": 0.780576765537262, "learning_rate": 6.100554785020804e-06, "loss": 0.0318, "step": 5633 }, { "epoch": 3.9030135088326983, "grad_norm": 0.6373487710952759, "learning_rate": 6.099861303744799e-06, "loss": 0.027, "step": 5634 }, { "epoch": 3.9037062694838935, "grad_norm": 0.584993839263916, "learning_rate": 6.099167822468793e-06, "loss": 0.0344, "step": 5635 }, { "epoch": 3.9043990301350884, "grad_norm": 0.6027787327766418, "learning_rate": 6.098474341192788e-06, "loss": 0.0319, "step": 5636 }, { "epoch": 3.905091790786283, "grad_norm": 0.7014255523681641, "learning_rate": 6.097780859916783e-06, "loss": 0.0351, "step": 5637 }, { "epoch": 3.9057845514374785, "grad_norm": 0.5559835433959961, "learning_rate": 6.097087378640777e-06, "loss": 0.0258, "step": 5638 }, { "epoch": 3.9064773120886733, "grad_norm": 0.6171395778656006, "learning_rate": 6.096393897364772e-06, "loss": 0.0357, "step": 5639 }, { "epoch": 3.907170072739868, "grad_norm": 0.6097577810287476, "learning_rate": 6.095700416088765e-06, "loss": 0.0272, "step": 5640 }, { "epoch": 3.9078628333910634, "grad_norm": 0.7907153367996216, "learning_rate": 6.09500693481276e-06, "loss": 0.0379, "step": 5641 }, { "epoch": 3.9085555940422583, "grad_norm": 0.5487450361251831, "learning_rate": 6.094313453536755e-06, "loss": 0.0334, "step": 5642 }, { "epoch": 3.9092483546934536, "grad_norm": 0.638064980506897, "learning_rate": 6.093619972260749e-06, "loss": 0.0355, "step": 5643 }, { "epoch": 3.9099411153446484, "grad_norm": 0.5636724233627319, "learning_rate": 6.092926490984744e-06, "loss": 0.0203, "step": 5644 }, { "epoch": 3.9106338759958437, "grad_norm": 0.6418699026107788, "learning_rate": 6.0922330097087375e-06, "loss": 0.0257, "step": 5645 }, { "epoch": 3.9113266366470385, "grad_norm": 0.6871115565299988, "learning_rate": 6.0915395284327325e-06, "loss": 0.034, "step": 5646 }, { "epoch": 3.9120193972982333, "grad_norm": 0.5053454041481018, "learning_rate": 6.090846047156727e-06, "loss": 0.0275, "step": 5647 }, { "epoch": 3.9127121579494286, "grad_norm": 0.5848159193992615, "learning_rate": 6.0901525658807215e-06, "loss": 0.0295, "step": 5648 }, { "epoch": 3.9134049186006234, "grad_norm": 0.7460579872131348, "learning_rate": 6.089459084604716e-06, "loss": 0.0449, "step": 5649 }, { "epoch": 3.9140976792518183, "grad_norm": 0.5983049273490906, "learning_rate": 6.0887656033287105e-06, "loss": 0.0366, "step": 5650 }, { "epoch": 3.9147904399030136, "grad_norm": 0.6734099984169006, "learning_rate": 6.0880721220527054e-06, "loss": 0.0285, "step": 5651 }, { "epoch": 3.9154832005542084, "grad_norm": 0.6277661323547363, "learning_rate": 6.0873786407766995e-06, "loss": 0.0344, "step": 5652 }, { "epoch": 3.9161759612054037, "grad_norm": 0.6390184164047241, "learning_rate": 6.086685159500694e-06, "loss": 0.0317, "step": 5653 }, { "epoch": 3.9168687218565985, "grad_norm": 0.6505480408668518, "learning_rate": 6.0859916782246885e-06, "loss": 0.0344, "step": 5654 }, { "epoch": 3.917561482507794, "grad_norm": 0.5460807085037231, "learning_rate": 6.085298196948683e-06, "loss": 0.0275, "step": 5655 }, { "epoch": 3.9182542431589886, "grad_norm": 0.5947510004043579, "learning_rate": 6.0846047156726776e-06, "loss": 0.0348, "step": 5656 }, { "epoch": 3.9189470038101835, "grad_norm": 0.8682061433792114, "learning_rate": 6.0839112343966725e-06, "loss": 0.0359, "step": 5657 }, { "epoch": 3.9196397644613787, "grad_norm": 0.5251128077507019, "learning_rate": 6.083217753120666e-06, "loss": 0.026, "step": 5658 }, { "epoch": 3.9203325251125736, "grad_norm": 0.7046948671340942, "learning_rate": 6.082524271844661e-06, "loss": 0.0352, "step": 5659 }, { "epoch": 3.9210252857637684, "grad_norm": 0.5649242997169495, "learning_rate": 6.081830790568655e-06, "loss": 0.0302, "step": 5660 }, { "epoch": 3.9217180464149637, "grad_norm": 0.6032993197441101, "learning_rate": 6.08113730929265e-06, "loss": 0.0334, "step": 5661 }, { "epoch": 3.9224108070661585, "grad_norm": 0.5204776525497437, "learning_rate": 6.080443828016645e-06, "loss": 0.0209, "step": 5662 }, { "epoch": 3.923103567717354, "grad_norm": 0.7001051902770996, "learning_rate": 6.079750346740638e-06, "loss": 0.0299, "step": 5663 }, { "epoch": 3.9237963283685486, "grad_norm": 0.6807113885879517, "learning_rate": 6.079056865464633e-06, "loss": 0.025, "step": 5664 }, { "epoch": 3.924489089019744, "grad_norm": 0.6066899299621582, "learning_rate": 6.078363384188627e-06, "loss": 0.0269, "step": 5665 }, { "epoch": 3.9251818496709387, "grad_norm": 0.843798816204071, "learning_rate": 6.077669902912622e-06, "loss": 0.0266, "step": 5666 }, { "epoch": 3.9258746103221336, "grad_norm": 0.6966928243637085, "learning_rate": 6.076976421636617e-06, "loss": 0.0336, "step": 5667 }, { "epoch": 3.926567370973329, "grad_norm": 0.6036196351051331, "learning_rate": 6.076282940360611e-06, "loss": 0.0247, "step": 5668 }, { "epoch": 3.9272601316245237, "grad_norm": 0.6237078309059143, "learning_rate": 6.075589459084606e-06, "loss": 0.0276, "step": 5669 }, { "epoch": 3.9279528922757185, "grad_norm": 0.6260361671447754, "learning_rate": 6.074895977808599e-06, "loss": 0.0231, "step": 5670 }, { "epoch": 3.928645652926914, "grad_norm": 0.7189197540283203, "learning_rate": 6.074202496532594e-06, "loss": 0.0404, "step": 5671 }, { "epoch": 3.9293384135781086, "grad_norm": 0.585728645324707, "learning_rate": 6.073509015256589e-06, "loss": 0.0312, "step": 5672 }, { "epoch": 3.930031174229304, "grad_norm": 0.5816975831985474, "learning_rate": 6.072815533980583e-06, "loss": 0.0254, "step": 5673 }, { "epoch": 3.9307239348804988, "grad_norm": 0.5944647192955017, "learning_rate": 6.072122052704578e-06, "loss": 0.028, "step": 5674 }, { "epoch": 3.931416695531694, "grad_norm": 0.5816596150398254, "learning_rate": 6.071428571428571e-06, "loss": 0.0274, "step": 5675 }, { "epoch": 3.932109456182889, "grad_norm": 0.6381893157958984, "learning_rate": 6.070735090152566e-06, "loss": 0.0284, "step": 5676 }, { "epoch": 3.9328022168340837, "grad_norm": 0.643214762210846, "learning_rate": 6.070041608876561e-06, "loss": 0.033, "step": 5677 }, { "epoch": 3.933494977485279, "grad_norm": 0.5460039377212524, "learning_rate": 6.069348127600555e-06, "loss": 0.0303, "step": 5678 }, { "epoch": 3.934187738136474, "grad_norm": 0.6501333117485046, "learning_rate": 6.06865464632455e-06, "loss": 0.032, "step": 5679 }, { "epoch": 3.9348804987876687, "grad_norm": 0.7149091362953186, "learning_rate": 6.067961165048544e-06, "loss": 0.0412, "step": 5680 }, { "epoch": 3.935573259438864, "grad_norm": 0.6299799680709839, "learning_rate": 6.067267683772538e-06, "loss": 0.0367, "step": 5681 }, { "epoch": 3.9362660200900588, "grad_norm": 0.6541127562522888, "learning_rate": 6.066574202496533e-06, "loss": 0.033, "step": 5682 }, { "epoch": 3.936958780741254, "grad_norm": 0.8621496558189392, "learning_rate": 6.065880721220527e-06, "loss": 0.0462, "step": 5683 }, { "epoch": 3.937651541392449, "grad_norm": 0.6117715835571289, "learning_rate": 6.065187239944522e-06, "loss": 0.0273, "step": 5684 }, { "epoch": 3.938344302043644, "grad_norm": 0.6450139284133911, "learning_rate": 6.064493758668516e-06, "loss": 0.035, "step": 5685 }, { "epoch": 3.939037062694839, "grad_norm": 0.686332643032074, "learning_rate": 6.063800277392511e-06, "loss": 0.0334, "step": 5686 }, { "epoch": 3.939729823346034, "grad_norm": 0.7327834963798523, "learning_rate": 6.063106796116506e-06, "loss": 0.0314, "step": 5687 }, { "epoch": 3.940422583997229, "grad_norm": 0.5229851007461548, "learning_rate": 6.062413314840499e-06, "loss": 0.0308, "step": 5688 }, { "epoch": 3.941115344648424, "grad_norm": 0.5099225044250488, "learning_rate": 6.061719833564494e-06, "loss": 0.0271, "step": 5689 }, { "epoch": 3.941808105299619, "grad_norm": 0.6219456195831299, "learning_rate": 6.061026352288488e-06, "loss": 0.0411, "step": 5690 }, { "epoch": 3.942500865950814, "grad_norm": 0.6136319637298584, "learning_rate": 6.060332871012483e-06, "loss": 0.0283, "step": 5691 }, { "epoch": 3.943193626602009, "grad_norm": 0.7287399172782898, "learning_rate": 6.059639389736478e-06, "loss": 0.0325, "step": 5692 }, { "epoch": 3.943886387253204, "grad_norm": 0.6488358378410339, "learning_rate": 6.0589459084604715e-06, "loss": 0.0352, "step": 5693 }, { "epoch": 3.944579147904399, "grad_norm": 0.5832087993621826, "learning_rate": 6.058252427184466e-06, "loss": 0.0335, "step": 5694 }, { "epoch": 3.9452719085555943, "grad_norm": 0.6404885053634644, "learning_rate": 6.0575589459084605e-06, "loss": 0.0391, "step": 5695 }, { "epoch": 3.945964669206789, "grad_norm": 0.4916565418243408, "learning_rate": 6.056865464632455e-06, "loss": 0.023, "step": 5696 }, { "epoch": 3.946657429857984, "grad_norm": 0.9438236355781555, "learning_rate": 6.05617198335645e-06, "loss": 0.0406, "step": 5697 }, { "epoch": 3.9473501905091792, "grad_norm": 0.5329188704490662, "learning_rate": 6.0554785020804444e-06, "loss": 0.0197, "step": 5698 }, { "epoch": 3.948042951160374, "grad_norm": 0.6993545889854431, "learning_rate": 6.054785020804439e-06, "loss": 0.0333, "step": 5699 }, { "epoch": 3.948735711811569, "grad_norm": 0.5379102826118469, "learning_rate": 6.054091539528433e-06, "loss": 0.0275, "step": 5700 }, { "epoch": 3.949428472462764, "grad_norm": 0.6155920028686523, "learning_rate": 6.0533980582524275e-06, "loss": 0.039, "step": 5701 }, { "epoch": 3.950121233113959, "grad_norm": 0.6053874492645264, "learning_rate": 6.0527045769764225e-06, "loss": 0.0257, "step": 5702 }, { "epoch": 3.9508139937651543, "grad_norm": 0.5987370610237122, "learning_rate": 6.0520110957004166e-06, "loss": 0.0425, "step": 5703 }, { "epoch": 3.951506754416349, "grad_norm": 0.5557329654693604, "learning_rate": 6.0513176144244115e-06, "loss": 0.0315, "step": 5704 }, { "epoch": 3.9521995150675444, "grad_norm": 0.5767523646354675, "learning_rate": 6.050624133148405e-06, "loss": 0.033, "step": 5705 }, { "epoch": 3.9528922757187392, "grad_norm": 0.7918155789375305, "learning_rate": 6.0499306518724e-06, "loss": 0.0368, "step": 5706 }, { "epoch": 3.953585036369934, "grad_norm": 0.7695801258087158, "learning_rate": 6.049237170596395e-06, "loss": 0.0321, "step": 5707 }, { "epoch": 3.9542777970211294, "grad_norm": 0.8224954009056091, "learning_rate": 6.048543689320389e-06, "loss": 0.0368, "step": 5708 }, { "epoch": 3.954970557672324, "grad_norm": 0.593601405620575, "learning_rate": 6.047850208044384e-06, "loss": 0.0353, "step": 5709 }, { "epoch": 3.955663318323519, "grad_norm": 0.74740070104599, "learning_rate": 6.047156726768377e-06, "loss": 0.0408, "step": 5710 }, { "epoch": 3.9563560789747143, "grad_norm": 0.5545331239700317, "learning_rate": 6.046463245492372e-06, "loss": 0.0298, "step": 5711 }, { "epoch": 3.957048839625909, "grad_norm": 0.6210556626319885, "learning_rate": 6.045769764216367e-06, "loss": 0.0269, "step": 5712 }, { "epoch": 3.9577416002771044, "grad_norm": 0.6635833978652954, "learning_rate": 6.045076282940361e-06, "loss": 0.0398, "step": 5713 }, { "epoch": 3.9584343609282993, "grad_norm": 0.6100125312805176, "learning_rate": 6.044382801664356e-06, "loss": 0.0277, "step": 5714 }, { "epoch": 3.9591271215794945, "grad_norm": 0.5405066013336182, "learning_rate": 6.04368932038835e-06, "loss": 0.0279, "step": 5715 }, { "epoch": 3.9598198822306894, "grad_norm": 0.578748881816864, "learning_rate": 6.042995839112345e-06, "loss": 0.0274, "step": 5716 }, { "epoch": 3.960512642881884, "grad_norm": 0.5417850017547607, "learning_rate": 6.04230235783634e-06, "loss": 0.0238, "step": 5717 }, { "epoch": 3.9612054035330795, "grad_norm": 0.7624323964118958, "learning_rate": 6.041608876560333e-06, "loss": 0.0381, "step": 5718 }, { "epoch": 3.9618981641842743, "grad_norm": 0.7578420042991638, "learning_rate": 6.040915395284328e-06, "loss": 0.0424, "step": 5719 }, { "epoch": 3.962590924835469, "grad_norm": 0.6090701818466187, "learning_rate": 6.040221914008322e-06, "loss": 0.0318, "step": 5720 }, { "epoch": 3.9632836854866644, "grad_norm": 0.5844273567199707, "learning_rate": 6.039528432732317e-06, "loss": 0.0302, "step": 5721 }, { "epoch": 3.9639764461378593, "grad_norm": 0.5725448131561279, "learning_rate": 6.038834951456312e-06, "loss": 0.0262, "step": 5722 }, { "epoch": 3.9646692067890545, "grad_norm": 0.617495596408844, "learning_rate": 6.038141470180305e-06, "loss": 0.0311, "step": 5723 }, { "epoch": 3.9653619674402494, "grad_norm": 0.5846660733222961, "learning_rate": 6.0374479889043e-06, "loss": 0.0287, "step": 5724 }, { "epoch": 3.9660547280914447, "grad_norm": 0.5877677202224731, "learning_rate": 6.036754507628294e-06, "loss": 0.0356, "step": 5725 }, { "epoch": 3.9667474887426395, "grad_norm": 0.5529595613479614, "learning_rate": 6.036061026352289e-06, "loss": 0.0244, "step": 5726 }, { "epoch": 3.9674402493938343, "grad_norm": 0.7487414479255676, "learning_rate": 6.035367545076284e-06, "loss": 0.0382, "step": 5727 }, { "epoch": 3.9681330100450296, "grad_norm": 0.5108417868614197, "learning_rate": 6.034674063800278e-06, "loss": 0.0224, "step": 5728 }, { "epoch": 3.9688257706962244, "grad_norm": 0.5159150958061218, "learning_rate": 6.033980582524272e-06, "loss": 0.0208, "step": 5729 }, { "epoch": 3.9695185313474193, "grad_norm": 0.6342235803604126, "learning_rate": 6.033287101248266e-06, "loss": 0.0402, "step": 5730 }, { "epoch": 3.9702112919986146, "grad_norm": 0.6006144881248474, "learning_rate": 6.032593619972261e-06, "loss": 0.0293, "step": 5731 }, { "epoch": 3.9709040526498094, "grad_norm": 0.5523648858070374, "learning_rate": 6.031900138696256e-06, "loss": 0.0206, "step": 5732 }, { "epoch": 3.9715968133010047, "grad_norm": 0.5949568748474121, "learning_rate": 6.03120665742025e-06, "loss": 0.0271, "step": 5733 }, { "epoch": 3.9722895739521995, "grad_norm": 0.5630077123641968, "learning_rate": 6.030513176144245e-06, "loss": 0.0252, "step": 5734 }, { "epoch": 3.972982334603395, "grad_norm": 0.5368571281433105, "learning_rate": 6.029819694868238e-06, "loss": 0.0248, "step": 5735 }, { "epoch": 3.9736750952545896, "grad_norm": 0.6003128886222839, "learning_rate": 6.029126213592233e-06, "loss": 0.0242, "step": 5736 }, { "epoch": 3.9743678559057845, "grad_norm": 0.5548897981643677, "learning_rate": 6.028432732316228e-06, "loss": 0.0258, "step": 5737 }, { "epoch": 3.9750606165569797, "grad_norm": 0.5407117605209351, "learning_rate": 6.027739251040222e-06, "loss": 0.0242, "step": 5738 }, { "epoch": 3.9757533772081746, "grad_norm": 0.5861427783966064, "learning_rate": 6.027045769764217e-06, "loss": 0.0276, "step": 5739 }, { "epoch": 3.9764461378593694, "grad_norm": 0.6128274202346802, "learning_rate": 6.0263522884882105e-06, "loss": 0.0321, "step": 5740 }, { "epoch": 3.9771388985105647, "grad_norm": 0.7110495567321777, "learning_rate": 6.025658807212205e-06, "loss": 0.0382, "step": 5741 }, { "epoch": 3.9778316591617595, "grad_norm": 0.634889543056488, "learning_rate": 6.0249653259362e-06, "loss": 0.0311, "step": 5742 }, { "epoch": 3.9785244198129543, "grad_norm": 0.5484490990638733, "learning_rate": 6.024271844660194e-06, "loss": 0.0258, "step": 5743 }, { "epoch": 3.9792171804641496, "grad_norm": 0.6471605896949768, "learning_rate": 6.023578363384189e-06, "loss": 0.031, "step": 5744 }, { "epoch": 3.979909941115345, "grad_norm": 0.6700403094291687, "learning_rate": 6.0228848821081834e-06, "loss": 0.0423, "step": 5745 }, { "epoch": 3.9806027017665397, "grad_norm": 0.7532503008842468, "learning_rate": 6.022191400832178e-06, "loss": 0.029, "step": 5746 }, { "epoch": 3.9812954624177346, "grad_norm": 0.49918434023857117, "learning_rate": 6.021497919556173e-06, "loss": 0.0241, "step": 5747 }, { "epoch": 3.98198822306893, "grad_norm": 0.4753774404525757, "learning_rate": 6.0208044382801665e-06, "loss": 0.0238, "step": 5748 }, { "epoch": 3.9826809837201247, "grad_norm": 0.6657820343971252, "learning_rate": 6.0201109570041615e-06, "loss": 0.0301, "step": 5749 }, { "epoch": 3.9833737443713195, "grad_norm": 0.6276349425315857, "learning_rate": 6.0194174757281556e-06, "loss": 0.0283, "step": 5750 }, { "epoch": 3.984066505022515, "grad_norm": 0.570073127746582, "learning_rate": 6.0187239944521505e-06, "loss": 0.0251, "step": 5751 }, { "epoch": 3.9847592656737096, "grad_norm": 0.7095314860343933, "learning_rate": 6.0180305131761454e-06, "loss": 0.0321, "step": 5752 }, { "epoch": 3.9854520263249045, "grad_norm": 0.6330137848854065, "learning_rate": 6.017337031900139e-06, "loss": 0.024, "step": 5753 }, { "epoch": 3.9861447869760998, "grad_norm": 0.82314133644104, "learning_rate": 6.016643550624134e-06, "loss": 0.0357, "step": 5754 }, { "epoch": 3.986837547627295, "grad_norm": 0.6464589834213257, "learning_rate": 6.015950069348128e-06, "loss": 0.0337, "step": 5755 }, { "epoch": 3.98753030827849, "grad_norm": 0.626295268535614, "learning_rate": 6.015256588072123e-06, "loss": 0.0375, "step": 5756 }, { "epoch": 3.9882230689296847, "grad_norm": 0.6050504446029663, "learning_rate": 6.0145631067961176e-06, "loss": 0.0233, "step": 5757 }, { "epoch": 3.98891582958088, "grad_norm": 0.7396339774131775, "learning_rate": 6.013869625520111e-06, "loss": 0.0584, "step": 5758 }, { "epoch": 3.989608590232075, "grad_norm": 0.6037498116493225, "learning_rate": 6.013176144244106e-06, "loss": 0.0354, "step": 5759 }, { "epoch": 3.9903013508832696, "grad_norm": 0.5753601789474487, "learning_rate": 6.0124826629681e-06, "loss": 0.0278, "step": 5760 }, { "epoch": 3.990994111534465, "grad_norm": 0.6056987643241882, "learning_rate": 6.011789181692095e-06, "loss": 0.0333, "step": 5761 }, { "epoch": 3.9916868721856598, "grad_norm": 0.6861843466758728, "learning_rate": 6.01109570041609e-06, "loss": 0.041, "step": 5762 }, { "epoch": 3.9923796328368546, "grad_norm": 0.6862473487854004, "learning_rate": 6.010402219140084e-06, "loss": 0.0395, "step": 5763 }, { "epoch": 3.99307239348805, "grad_norm": 0.5396139621734619, "learning_rate": 6.009708737864079e-06, "loss": 0.028, "step": 5764 }, { "epoch": 3.993765154139245, "grad_norm": 0.6474992632865906, "learning_rate": 6.009015256588072e-06, "loss": 0.0335, "step": 5765 }, { "epoch": 3.99445791479044, "grad_norm": 0.5092523694038391, "learning_rate": 6.008321775312067e-06, "loss": 0.0252, "step": 5766 }, { "epoch": 3.995150675441635, "grad_norm": 0.6200748682022095, "learning_rate": 6.007628294036062e-06, "loss": 0.0296, "step": 5767 }, { "epoch": 3.99584343609283, "grad_norm": 0.6921845078468323, "learning_rate": 6.006934812760056e-06, "loss": 0.0336, "step": 5768 }, { "epoch": 3.996536196744025, "grad_norm": 0.5785850286483765, "learning_rate": 6.006241331484051e-06, "loss": 0.0332, "step": 5769 }, { "epoch": 3.9972289573952198, "grad_norm": 0.5929657816886902, "learning_rate": 6.005547850208044e-06, "loss": 0.031, "step": 5770 }, { "epoch": 3.997921718046415, "grad_norm": 0.6023411750793457, "learning_rate": 6.004854368932039e-06, "loss": 0.0233, "step": 5771 }, { "epoch": 3.99861447869761, "grad_norm": 0.5986213684082031, "learning_rate": 6.004160887656034e-06, "loss": 0.0284, "step": 5772 }, { "epoch": 3.9993072393488047, "grad_norm": 0.5923529863357544, "learning_rate": 6.003467406380028e-06, "loss": 0.0292, "step": 5773 }, { "epoch": 4.0, "grad_norm": 0.5920475721359253, "learning_rate": 6.002773925104023e-06, "loss": 0.0331, "step": 5774 }, { "epoch": 4.0, "eval_loss": 0.2548733055591583, "eval_runtime": 7633.7469, "eval_samples_per_second": 1.048, "eval_steps_per_second": 0.033, "eval_wer": 12.593032600719804, "step": 5774 }, { "epoch": 4.000692760651195, "grad_norm": 0.44014424085617065, "learning_rate": 6.002080443828017e-06, "loss": 0.0137, "step": 5775 }, { "epoch": 4.00138552130239, "grad_norm": 0.39017313718795776, "learning_rate": 6.001386962552012e-06, "loss": 0.0152, "step": 5776 }, { "epoch": 4.002078281953585, "grad_norm": 0.4326973557472229, "learning_rate": 6.000693481276006e-06, "loss": 0.0173, "step": 5777 }, { "epoch": 4.00277104260478, "grad_norm": 0.36008891463279724, "learning_rate": 6e-06, "loss": 0.0132, "step": 5778 }, { "epoch": 4.003463803255975, "grad_norm": 0.3663184344768524, "learning_rate": 5.999306518723995e-06, "loss": 0.016, "step": 5779 }, { "epoch": 4.00415656390717, "grad_norm": 0.6453715562820435, "learning_rate": 5.998613037447989e-06, "loss": 0.0268, "step": 5780 }, { "epoch": 4.004849324558365, "grad_norm": 0.38540932536125183, "learning_rate": 5.997919556171984e-06, "loss": 0.0135, "step": 5781 }, { "epoch": 4.0055420852095605, "grad_norm": 0.45824089646339417, "learning_rate": 5.997226074895979e-06, "loss": 0.0163, "step": 5782 }, { "epoch": 4.006234845860755, "grad_norm": 0.3920636475086212, "learning_rate": 5.996532593619972e-06, "loss": 0.0166, "step": 5783 }, { "epoch": 4.00692760651195, "grad_norm": 0.5281590223312378, "learning_rate": 5.995839112343967e-06, "loss": 0.0178, "step": 5784 }, { "epoch": 4.007620367163145, "grad_norm": 0.4874959886074066, "learning_rate": 5.995145631067961e-06, "loss": 0.0149, "step": 5785 }, { "epoch": 4.00831312781434, "grad_norm": 0.46124112606048584, "learning_rate": 5.994452149791956e-06, "loss": 0.019, "step": 5786 }, { "epoch": 4.009005888465535, "grad_norm": 0.5415635108947754, "learning_rate": 5.993758668515951e-06, "loss": 0.0197, "step": 5787 }, { "epoch": 4.00969864911673, "grad_norm": 0.4696653187274933, "learning_rate": 5.993065187239944e-06, "loss": 0.0208, "step": 5788 }, { "epoch": 4.010391409767925, "grad_norm": 0.43915730714797974, "learning_rate": 5.992371705963939e-06, "loss": 0.0139, "step": 5789 }, { "epoch": 4.01108417041912, "grad_norm": 0.5245053768157959, "learning_rate": 5.9916782246879334e-06, "loss": 0.0188, "step": 5790 }, { "epoch": 4.011776931070315, "grad_norm": 0.4111827313899994, "learning_rate": 5.990984743411928e-06, "loss": 0.012, "step": 5791 }, { "epoch": 4.012469691721511, "grad_norm": 0.47101888060569763, "learning_rate": 5.990291262135923e-06, "loss": 0.022, "step": 5792 }, { "epoch": 4.013162452372705, "grad_norm": 0.666670024394989, "learning_rate": 5.989597780859917e-06, "loss": 0.0148, "step": 5793 }, { "epoch": 4.0138552130239, "grad_norm": 0.3970896899700165, "learning_rate": 5.988904299583912e-06, "loss": 0.0146, "step": 5794 }, { "epoch": 4.0145479736750955, "grad_norm": 0.5917661190032959, "learning_rate": 5.9882108183079055e-06, "loss": 0.0098, "step": 5795 }, { "epoch": 4.01524073432629, "grad_norm": 0.5205355286598206, "learning_rate": 5.9875173370319005e-06, "loss": 0.0146, "step": 5796 }, { "epoch": 4.015933494977485, "grad_norm": 0.4315672218799591, "learning_rate": 5.986823855755895e-06, "loss": 0.0136, "step": 5797 }, { "epoch": 4.0166262556286805, "grad_norm": 0.47964319586753845, "learning_rate": 5.9861303744798895e-06, "loss": 0.0163, "step": 5798 }, { "epoch": 4.017319016279875, "grad_norm": 0.35959741473197937, "learning_rate": 5.9854368932038844e-06, "loss": 0.0127, "step": 5799 }, { "epoch": 4.01801177693107, "grad_norm": 0.43572506308555603, "learning_rate": 5.984743411927878e-06, "loss": 0.0159, "step": 5800 }, { "epoch": 4.018704537582265, "grad_norm": 0.3837737441062927, "learning_rate": 5.984049930651873e-06, "loss": 0.0132, "step": 5801 }, { "epoch": 4.019397298233461, "grad_norm": 0.4511842727661133, "learning_rate": 5.9833564493758675e-06, "loss": 0.0134, "step": 5802 }, { "epoch": 4.020090058884655, "grad_norm": 0.40162840485572815, "learning_rate": 5.982662968099862e-06, "loss": 0.0171, "step": 5803 }, { "epoch": 4.02078281953585, "grad_norm": 0.4260197579860687, "learning_rate": 5.9819694868238566e-06, "loss": 0.0169, "step": 5804 }, { "epoch": 4.021475580187046, "grad_norm": 0.4432424008846283, "learning_rate": 5.981276005547851e-06, "loss": 0.0156, "step": 5805 }, { "epoch": 4.02216834083824, "grad_norm": 0.5178232789039612, "learning_rate": 5.980582524271845e-06, "loss": 0.0177, "step": 5806 }, { "epoch": 4.022861101489435, "grad_norm": 0.3381485044956207, "learning_rate": 5.97988904299584e-06, "loss": 0.0147, "step": 5807 }, { "epoch": 4.023553862140631, "grad_norm": 0.54864102602005, "learning_rate": 5.979195561719834e-06, "loss": 0.018, "step": 5808 }, { "epoch": 4.024246622791825, "grad_norm": 0.36553627252578735, "learning_rate": 5.978502080443829e-06, "loss": 0.0157, "step": 5809 }, { "epoch": 4.02493938344302, "grad_norm": 0.45925456285476685, "learning_rate": 5.977808599167823e-06, "loss": 0.018, "step": 5810 }, { "epoch": 4.0256321440942155, "grad_norm": 0.5858983993530273, "learning_rate": 5.977115117891818e-06, "loss": 0.0176, "step": 5811 }, { "epoch": 4.026324904745411, "grad_norm": 0.40633654594421387, "learning_rate": 5.976421636615813e-06, "loss": 0.0163, "step": 5812 }, { "epoch": 4.027017665396605, "grad_norm": 0.3767582178115845, "learning_rate": 5.975728155339806e-06, "loss": 0.013, "step": 5813 }, { "epoch": 4.0277104260478005, "grad_norm": 0.410575807094574, "learning_rate": 5.975034674063801e-06, "loss": 0.0146, "step": 5814 }, { "epoch": 4.028403186698996, "grad_norm": 0.48538967967033386, "learning_rate": 5.974341192787795e-06, "loss": 0.0128, "step": 5815 }, { "epoch": 4.02909594735019, "grad_norm": 0.5136532783508301, "learning_rate": 5.97364771151179e-06, "loss": 0.0182, "step": 5816 }, { "epoch": 4.0297887080013854, "grad_norm": 0.6098598837852478, "learning_rate": 5.972954230235785e-06, "loss": 0.0155, "step": 5817 }, { "epoch": 4.030481468652581, "grad_norm": 0.46291500329971313, "learning_rate": 5.972260748959778e-06, "loss": 0.0182, "step": 5818 }, { "epoch": 4.031174229303775, "grad_norm": 0.3335826098918915, "learning_rate": 5.971567267683773e-06, "loss": 0.01, "step": 5819 }, { "epoch": 4.03186698995497, "grad_norm": 0.44913986325263977, "learning_rate": 5.970873786407767e-06, "loss": 0.0137, "step": 5820 }, { "epoch": 4.032559750606166, "grad_norm": 0.5580949187278748, "learning_rate": 5.970180305131762e-06, "loss": 0.0193, "step": 5821 }, { "epoch": 4.033252511257361, "grad_norm": 0.6049192547798157, "learning_rate": 5.969486823855757e-06, "loss": 0.0186, "step": 5822 }, { "epoch": 4.033945271908555, "grad_norm": 0.46268901228904724, "learning_rate": 5.968793342579751e-06, "loss": 0.0119, "step": 5823 }, { "epoch": 4.034638032559751, "grad_norm": 0.5266537666320801, "learning_rate": 5.968099861303746e-06, "loss": 0.015, "step": 5824 }, { "epoch": 4.035330793210946, "grad_norm": 0.5469845533370972, "learning_rate": 5.967406380027739e-06, "loss": 0.0236, "step": 5825 }, { "epoch": 4.03602355386214, "grad_norm": 0.38151949644088745, "learning_rate": 5.966712898751734e-06, "loss": 0.0137, "step": 5826 }, { "epoch": 4.036716314513336, "grad_norm": 0.41305768489837646, "learning_rate": 5.966019417475729e-06, "loss": 0.0113, "step": 5827 }, { "epoch": 4.037409075164531, "grad_norm": 0.41235440969467163, "learning_rate": 5.965325936199723e-06, "loss": 0.0156, "step": 5828 }, { "epoch": 4.038101835815725, "grad_norm": 0.713265597820282, "learning_rate": 5.964632454923718e-06, "loss": 0.0226, "step": 5829 }, { "epoch": 4.0387945964669205, "grad_norm": 0.6062613129615784, "learning_rate": 5.963938973647711e-06, "loss": 0.025, "step": 5830 }, { "epoch": 4.039487357118116, "grad_norm": 0.7014955282211304, "learning_rate": 5.963245492371706e-06, "loss": 0.0182, "step": 5831 }, { "epoch": 4.040180117769311, "grad_norm": 0.534320592880249, "learning_rate": 5.962552011095701e-06, "loss": 0.0194, "step": 5832 }, { "epoch": 4.0408728784205055, "grad_norm": 0.3863247036933899, "learning_rate": 5.961858529819695e-06, "loss": 0.0105, "step": 5833 }, { "epoch": 4.041565639071701, "grad_norm": 0.5409172177314758, "learning_rate": 5.96116504854369e-06, "loss": 0.0148, "step": 5834 }, { "epoch": 4.042258399722896, "grad_norm": 0.4131470024585724, "learning_rate": 5.960471567267684e-06, "loss": 0.0147, "step": 5835 }, { "epoch": 4.04295116037409, "grad_norm": 0.46925875544548035, "learning_rate": 5.959778085991678e-06, "loss": 0.0175, "step": 5836 }, { "epoch": 4.043643921025286, "grad_norm": 0.49055975675582886, "learning_rate": 5.959084604715673e-06, "loss": 0.0113, "step": 5837 }, { "epoch": 4.044336681676481, "grad_norm": 0.4608907103538513, "learning_rate": 5.958391123439667e-06, "loss": 0.0121, "step": 5838 }, { "epoch": 4.045029442327675, "grad_norm": 0.41653043031692505, "learning_rate": 5.957697642163662e-06, "loss": 0.0171, "step": 5839 }, { "epoch": 4.045722202978871, "grad_norm": 0.45659032464027405, "learning_rate": 5.957004160887656e-06, "loss": 0.0184, "step": 5840 }, { "epoch": 4.046414963630066, "grad_norm": 0.43281108140945435, "learning_rate": 5.956310679611651e-06, "loss": 0.0146, "step": 5841 }, { "epoch": 4.047107724281261, "grad_norm": 0.3644174039363861, "learning_rate": 5.955617198335646e-06, "loss": 0.0115, "step": 5842 }, { "epoch": 4.047800484932456, "grad_norm": 0.6167822480201721, "learning_rate": 5.9549237170596395e-06, "loss": 0.0189, "step": 5843 }, { "epoch": 4.048493245583651, "grad_norm": 0.48649120330810547, "learning_rate": 5.954230235783634e-06, "loss": 0.0189, "step": 5844 }, { "epoch": 4.049186006234846, "grad_norm": 0.5015866756439209, "learning_rate": 5.9535367545076285e-06, "loss": 0.0223, "step": 5845 }, { "epoch": 4.0498787668860405, "grad_norm": 0.44310736656188965, "learning_rate": 5.9528432732316234e-06, "loss": 0.0129, "step": 5846 }, { "epoch": 4.050571527537236, "grad_norm": 0.3944130837917328, "learning_rate": 5.952149791955618e-06, "loss": 0.0159, "step": 5847 }, { "epoch": 4.051264288188431, "grad_norm": 0.6501338481903076, "learning_rate": 5.951456310679612e-06, "loss": 0.0177, "step": 5848 }, { "epoch": 4.0519570488396255, "grad_norm": 0.44322100281715393, "learning_rate": 5.9507628294036065e-06, "loss": 0.0107, "step": 5849 }, { "epoch": 4.052649809490821, "grad_norm": 0.4589598774909973, "learning_rate": 5.950069348127601e-06, "loss": 0.015, "step": 5850 }, { "epoch": 4.053342570142016, "grad_norm": 0.5213820934295654, "learning_rate": 5.9493758668515956e-06, "loss": 0.0134, "step": 5851 }, { "epoch": 4.054035330793211, "grad_norm": 0.4292052388191223, "learning_rate": 5.9486823855755905e-06, "loss": 0.0139, "step": 5852 }, { "epoch": 4.054728091444406, "grad_norm": 0.3794039189815521, "learning_rate": 5.947988904299585e-06, "loss": 0.0163, "step": 5853 }, { "epoch": 4.055420852095601, "grad_norm": 0.3504757285118103, "learning_rate": 5.9472954230235795e-06, "loss": 0.0111, "step": 5854 }, { "epoch": 4.056113612746796, "grad_norm": 0.29840224981307983, "learning_rate": 5.946601941747573e-06, "loss": 0.0098, "step": 5855 }, { "epoch": 4.056806373397991, "grad_norm": 0.565812885761261, "learning_rate": 5.945908460471568e-06, "loss": 0.0189, "step": 5856 }, { "epoch": 4.057499134049186, "grad_norm": 0.3221192955970764, "learning_rate": 5.945214979195563e-06, "loss": 0.0116, "step": 5857 }, { "epoch": 4.058191894700381, "grad_norm": 0.5720316767692566, "learning_rate": 5.944521497919557e-06, "loss": 0.0142, "step": 5858 }, { "epoch": 4.058884655351576, "grad_norm": 0.5223294496536255, "learning_rate": 5.943828016643552e-06, "loss": 0.0197, "step": 5859 }, { "epoch": 4.059577416002771, "grad_norm": 0.3791462779045105, "learning_rate": 5.943134535367545e-06, "loss": 0.0153, "step": 5860 }, { "epoch": 4.060270176653966, "grad_norm": 0.3745465576648712, "learning_rate": 5.94244105409154e-06, "loss": 0.0129, "step": 5861 }, { "epoch": 4.0609629373051614, "grad_norm": 0.46460044384002686, "learning_rate": 5.941747572815535e-06, "loss": 0.0129, "step": 5862 }, { "epoch": 4.061655697956356, "grad_norm": 0.40892907977104187, "learning_rate": 5.941054091539529e-06, "loss": 0.0159, "step": 5863 }, { "epoch": 4.062348458607551, "grad_norm": 0.41356760263442993, "learning_rate": 5.940360610263524e-06, "loss": 0.0145, "step": 5864 }, { "epoch": 4.063041219258746, "grad_norm": 0.43089690804481506, "learning_rate": 5.939667128987517e-06, "loss": 0.0185, "step": 5865 }, { "epoch": 4.063733979909941, "grad_norm": 0.36338141560554504, "learning_rate": 5.938973647711512e-06, "loss": 0.0134, "step": 5866 }, { "epoch": 4.064426740561136, "grad_norm": 0.5414767265319824, "learning_rate": 5.938280166435507e-06, "loss": 0.0176, "step": 5867 }, { "epoch": 4.065119501212331, "grad_norm": 0.6014214158058167, "learning_rate": 5.937586685159501e-06, "loss": 0.018, "step": 5868 }, { "epoch": 4.065812261863526, "grad_norm": 0.4352724254131317, "learning_rate": 5.936893203883496e-06, "loss": 0.0147, "step": 5869 }, { "epoch": 4.066505022514721, "grad_norm": 0.45042163133621216, "learning_rate": 5.93619972260749e-06, "loss": 0.0153, "step": 5870 }, { "epoch": 4.067197783165916, "grad_norm": 0.39651691913604736, "learning_rate": 5.935506241331485e-06, "loss": 0.0122, "step": 5871 }, { "epoch": 4.067890543817112, "grad_norm": 0.5998177528381348, "learning_rate": 5.93481276005548e-06, "loss": 0.0136, "step": 5872 }, { "epoch": 4.068583304468306, "grad_norm": 0.46614235639572144, "learning_rate": 5.934119278779473e-06, "loss": 0.0132, "step": 5873 }, { "epoch": 4.069276065119501, "grad_norm": 0.3535293638706207, "learning_rate": 5.933425797503468e-06, "loss": 0.0116, "step": 5874 }, { "epoch": 4.0699688257706965, "grad_norm": 0.45569026470184326, "learning_rate": 5.932732316227462e-06, "loss": 0.0139, "step": 5875 }, { "epoch": 4.070661586421891, "grad_norm": 0.4665956497192383, "learning_rate": 5.932038834951457e-06, "loss": 0.0161, "step": 5876 }, { "epoch": 4.071354347073086, "grad_norm": 0.4282681345939636, "learning_rate": 5.931345353675452e-06, "loss": 0.0152, "step": 5877 }, { "epoch": 4.0720471077242815, "grad_norm": 0.48855772614479065, "learning_rate": 5.930651872399445e-06, "loss": 0.0133, "step": 5878 }, { "epoch": 4.072739868375476, "grad_norm": 0.44515877962112427, "learning_rate": 5.92995839112344e-06, "loss": 0.0164, "step": 5879 }, { "epoch": 4.073432629026671, "grad_norm": 0.5303168892860413, "learning_rate": 5.929264909847434e-06, "loss": 0.0168, "step": 5880 }, { "epoch": 4.074125389677866, "grad_norm": 0.3862502872943878, "learning_rate": 5.928571428571429e-06, "loss": 0.0105, "step": 5881 }, { "epoch": 4.074818150329062, "grad_norm": 0.3936847150325775, "learning_rate": 5.927877947295424e-06, "loss": 0.0157, "step": 5882 }, { "epoch": 4.075510910980256, "grad_norm": 0.4762272238731384, "learning_rate": 5.927184466019418e-06, "loss": 0.0137, "step": 5883 }, { "epoch": 4.076203671631451, "grad_norm": 0.5180473923683167, "learning_rate": 5.926490984743412e-06, "loss": 0.0229, "step": 5884 }, { "epoch": 4.076896432282647, "grad_norm": 0.40103399753570557, "learning_rate": 5.925797503467406e-06, "loss": 0.0099, "step": 5885 }, { "epoch": 4.077589192933841, "grad_norm": 0.5800018310546875, "learning_rate": 5.925104022191401e-06, "loss": 0.0117, "step": 5886 }, { "epoch": 4.078281953585036, "grad_norm": 0.39674970507621765, "learning_rate": 5.924410540915396e-06, "loss": 0.0156, "step": 5887 }, { "epoch": 4.078974714236232, "grad_norm": 0.43166518211364746, "learning_rate": 5.92371705963939e-06, "loss": 0.0113, "step": 5888 }, { "epoch": 4.079667474887426, "grad_norm": 0.5330915451049805, "learning_rate": 5.923023578363385e-06, "loss": 0.0142, "step": 5889 }, { "epoch": 4.080360235538621, "grad_norm": 0.40920907258987427, "learning_rate": 5.9223300970873785e-06, "loss": 0.012, "step": 5890 }, { "epoch": 4.0810529961898165, "grad_norm": 0.3786559998989105, "learning_rate": 5.921636615811373e-06, "loss": 0.0141, "step": 5891 }, { "epoch": 4.081745756841012, "grad_norm": 0.4738898277282715, "learning_rate": 5.920943134535368e-06, "loss": 0.013, "step": 5892 }, { "epoch": 4.082438517492206, "grad_norm": 0.3698556125164032, "learning_rate": 5.9202496532593624e-06, "loss": 0.0139, "step": 5893 }, { "epoch": 4.0831312781434015, "grad_norm": 0.4169827401638031, "learning_rate": 5.919556171983357e-06, "loss": 0.0156, "step": 5894 }, { "epoch": 4.083824038794597, "grad_norm": 0.3827657401561737, "learning_rate": 5.918862690707351e-06, "loss": 0.0101, "step": 5895 }, { "epoch": 4.084516799445791, "grad_norm": 0.4681031107902527, "learning_rate": 5.9181692094313455e-06, "loss": 0.0175, "step": 5896 }, { "epoch": 4.085209560096986, "grad_norm": 0.6041281819343567, "learning_rate": 5.9174757281553405e-06, "loss": 0.0163, "step": 5897 }, { "epoch": 4.085902320748182, "grad_norm": 0.44216060638427734, "learning_rate": 5.9167822468793346e-06, "loss": 0.0158, "step": 5898 }, { "epoch": 4.086595081399376, "grad_norm": 0.3780629634857178, "learning_rate": 5.9160887656033295e-06, "loss": 0.0109, "step": 5899 }, { "epoch": 4.087287842050571, "grad_norm": 0.3870496153831482, "learning_rate": 5.915395284327324e-06, "loss": 0.0169, "step": 5900 }, { "epoch": 4.087980602701767, "grad_norm": 0.4418138563632965, "learning_rate": 5.9147018030513185e-06, "loss": 0.0129, "step": 5901 }, { "epoch": 4.088673363352962, "grad_norm": 0.3675566017627716, "learning_rate": 5.9140083217753134e-06, "loss": 0.0138, "step": 5902 }, { "epoch": 4.089366124004156, "grad_norm": 0.691497266292572, "learning_rate": 5.913314840499307e-06, "loss": 0.016, "step": 5903 }, { "epoch": 4.090058884655352, "grad_norm": 0.4711749255657196, "learning_rate": 5.912621359223302e-06, "loss": 0.0209, "step": 5904 }, { "epoch": 4.090751645306547, "grad_norm": 0.3874192535877228, "learning_rate": 5.911927877947296e-06, "loss": 0.0104, "step": 5905 }, { "epoch": 4.091444405957741, "grad_norm": 0.42109841108322144, "learning_rate": 5.911234396671291e-06, "loss": 0.0173, "step": 5906 }, { "epoch": 4.092137166608937, "grad_norm": 0.3346777558326721, "learning_rate": 5.9105409153952856e-06, "loss": 0.0113, "step": 5907 }, { "epoch": 4.092829927260132, "grad_norm": 0.41182124614715576, "learning_rate": 5.909847434119279e-06, "loss": 0.0149, "step": 5908 }, { "epoch": 4.093522687911326, "grad_norm": 0.36244627833366394, "learning_rate": 5.909153952843274e-06, "loss": 0.0123, "step": 5909 }, { "epoch": 4.0942154485625215, "grad_norm": 0.4372624158859253, "learning_rate": 5.908460471567268e-06, "loss": 0.0126, "step": 5910 }, { "epoch": 4.094908209213717, "grad_norm": 0.3828391432762146, "learning_rate": 5.907766990291263e-06, "loss": 0.0143, "step": 5911 }, { "epoch": 4.095600969864912, "grad_norm": 0.45144957304000854, "learning_rate": 5.907073509015258e-06, "loss": 0.0115, "step": 5912 }, { "epoch": 4.0962937305161065, "grad_norm": 0.5366044044494629, "learning_rate": 5.906380027739251e-06, "loss": 0.0178, "step": 5913 }, { "epoch": 4.096986491167302, "grad_norm": 0.37477147579193115, "learning_rate": 5.905686546463246e-06, "loss": 0.0153, "step": 5914 }, { "epoch": 4.097679251818497, "grad_norm": 0.42522063851356506, "learning_rate": 5.90499306518724e-06, "loss": 0.0128, "step": 5915 }, { "epoch": 4.098372012469691, "grad_norm": 0.41694512963294983, "learning_rate": 5.904299583911235e-06, "loss": 0.0111, "step": 5916 }, { "epoch": 4.099064773120887, "grad_norm": 0.5273397564888, "learning_rate": 5.90360610263523e-06, "loss": 0.0165, "step": 5917 }, { "epoch": 4.099757533772082, "grad_norm": 0.5179102420806885, "learning_rate": 5.902912621359224e-06, "loss": 0.0193, "step": 5918 }, { "epoch": 4.100450294423276, "grad_norm": 0.5258399844169617, "learning_rate": 5.902219140083219e-06, "loss": 0.0214, "step": 5919 }, { "epoch": 4.101143055074472, "grad_norm": 0.43828997015953064, "learning_rate": 5.901525658807212e-06, "loss": 0.0153, "step": 5920 }, { "epoch": 4.101835815725667, "grad_norm": 0.35232213139533997, "learning_rate": 5.900832177531207e-06, "loss": 0.011, "step": 5921 }, { "epoch": 4.102528576376862, "grad_norm": 0.43267562985420227, "learning_rate": 5.900138696255202e-06, "loss": 0.0159, "step": 5922 }, { "epoch": 4.103221337028057, "grad_norm": 0.4062022566795349, "learning_rate": 5.899445214979196e-06, "loss": 0.0197, "step": 5923 }, { "epoch": 4.103914097679252, "grad_norm": 0.3769342005252838, "learning_rate": 5.898751733703191e-06, "loss": 0.0165, "step": 5924 }, { "epoch": 4.104606858330447, "grad_norm": 0.37129485607147217, "learning_rate": 5.898058252427184e-06, "loss": 0.0132, "step": 5925 }, { "epoch": 4.1052996189816415, "grad_norm": 0.48331621289253235, "learning_rate": 5.897364771151179e-06, "loss": 0.0231, "step": 5926 }, { "epoch": 4.105992379632837, "grad_norm": 0.4839484393596649, "learning_rate": 5.896671289875174e-06, "loss": 0.0174, "step": 5927 }, { "epoch": 4.106685140284032, "grad_norm": 0.47911614179611206, "learning_rate": 5.895977808599168e-06, "loss": 0.0145, "step": 5928 }, { "epoch": 4.1073779009352265, "grad_norm": 0.5056118369102478, "learning_rate": 5.895284327323163e-06, "loss": 0.0174, "step": 5929 }, { "epoch": 4.108070661586422, "grad_norm": 0.6311521530151367, "learning_rate": 5.894590846047157e-06, "loss": 0.0153, "step": 5930 }, { "epoch": 4.108763422237617, "grad_norm": 0.4103589653968811, "learning_rate": 5.893897364771152e-06, "loss": 0.015, "step": 5931 }, { "epoch": 4.109456182888812, "grad_norm": 0.3977261781692505, "learning_rate": 5.893203883495146e-06, "loss": 0.0172, "step": 5932 }, { "epoch": 4.110148943540007, "grad_norm": 0.6271304488182068, "learning_rate": 5.89251040221914e-06, "loss": 0.0177, "step": 5933 }, { "epoch": 4.110841704191202, "grad_norm": 0.3656662106513977, "learning_rate": 5.891816920943135e-06, "loss": 0.0122, "step": 5934 }, { "epoch": 4.111534464842397, "grad_norm": 0.4223005473613739, "learning_rate": 5.891123439667129e-06, "loss": 0.0159, "step": 5935 }, { "epoch": 4.112227225493592, "grad_norm": 0.38988810777664185, "learning_rate": 5.890429958391124e-06, "loss": 0.0112, "step": 5936 }, { "epoch": 4.112919986144787, "grad_norm": 0.4237985908985138, "learning_rate": 5.889736477115119e-06, "loss": 0.0155, "step": 5937 }, { "epoch": 4.113612746795982, "grad_norm": 0.4080210030078888, "learning_rate": 5.889042995839112e-06, "loss": 0.0137, "step": 5938 }, { "epoch": 4.114305507447177, "grad_norm": 0.5066666007041931, "learning_rate": 5.888349514563107e-06, "loss": 0.0231, "step": 5939 }, { "epoch": 4.114998268098372, "grad_norm": 0.3873167932033539, "learning_rate": 5.8876560332871014e-06, "loss": 0.0169, "step": 5940 }, { "epoch": 4.115691028749567, "grad_norm": 0.3531154692173004, "learning_rate": 5.886962552011096e-06, "loss": 0.0123, "step": 5941 }, { "epoch": 4.116383789400762, "grad_norm": 0.4736908972263336, "learning_rate": 5.886269070735091e-06, "loss": 0.0173, "step": 5942 }, { "epoch": 4.117076550051957, "grad_norm": 0.500149667263031, "learning_rate": 5.8855755894590845e-06, "loss": 0.0138, "step": 5943 }, { "epoch": 4.117769310703152, "grad_norm": 0.43999993801116943, "learning_rate": 5.8848821081830795e-06, "loss": 0.0157, "step": 5944 }, { "epoch": 4.118462071354347, "grad_norm": 0.4465121626853943, "learning_rate": 5.8841886269070736e-06, "loss": 0.0197, "step": 5945 }, { "epoch": 4.119154832005542, "grad_norm": 0.36826783418655396, "learning_rate": 5.8834951456310685e-06, "loss": 0.0118, "step": 5946 }, { "epoch": 4.119847592656737, "grad_norm": 0.3605504631996155, "learning_rate": 5.8828016643550634e-06, "loss": 0.012, "step": 5947 }, { "epoch": 4.120540353307932, "grad_norm": 0.4197753667831421, "learning_rate": 5.8821081830790575e-06, "loss": 0.0152, "step": 5948 }, { "epoch": 4.121233113959127, "grad_norm": 0.40580058097839355, "learning_rate": 5.8814147018030524e-06, "loss": 0.0148, "step": 5949 }, { "epoch": 4.121925874610322, "grad_norm": 0.46573224663734436, "learning_rate": 5.880721220527046e-06, "loss": 0.0114, "step": 5950 }, { "epoch": 4.122618635261517, "grad_norm": 0.49255266785621643, "learning_rate": 5.880027739251041e-06, "loss": 0.013, "step": 5951 }, { "epoch": 4.123311395912713, "grad_norm": 0.500079333782196, "learning_rate": 5.8793342579750356e-06, "loss": 0.0163, "step": 5952 }, { "epoch": 4.124004156563907, "grad_norm": 0.3782138526439667, "learning_rate": 5.87864077669903e-06, "loss": 0.0145, "step": 5953 }, { "epoch": 4.124696917215102, "grad_norm": 0.6033086776733398, "learning_rate": 5.8779472954230246e-06, "loss": 0.0214, "step": 5954 }, { "epoch": 4.1253896778662975, "grad_norm": 0.5711798667907715, "learning_rate": 5.877253814147018e-06, "loss": 0.0181, "step": 5955 }, { "epoch": 4.126082438517492, "grad_norm": 0.4918570816516876, "learning_rate": 5.876560332871013e-06, "loss": 0.019, "step": 5956 }, { "epoch": 4.126775199168687, "grad_norm": 0.4438480734825134, "learning_rate": 5.875866851595008e-06, "loss": 0.0181, "step": 5957 }, { "epoch": 4.1274679598198825, "grad_norm": 0.5217729210853577, "learning_rate": 5.875173370319002e-06, "loss": 0.0111, "step": 5958 }, { "epoch": 4.128160720471077, "grad_norm": 0.48910391330718994, "learning_rate": 5.874479889042997e-06, "loss": 0.0181, "step": 5959 }, { "epoch": 4.128853481122272, "grad_norm": 0.42340871691703796, "learning_rate": 5.873786407766991e-06, "loss": 0.0141, "step": 5960 }, { "epoch": 4.129546241773467, "grad_norm": 0.39708080887794495, "learning_rate": 5.873092926490985e-06, "loss": 0.0133, "step": 5961 }, { "epoch": 4.130239002424663, "grad_norm": 1.0382578372955322, "learning_rate": 5.87239944521498e-06, "loss": 0.0222, "step": 5962 }, { "epoch": 4.130931763075857, "grad_norm": 0.428201287984848, "learning_rate": 5.871705963938974e-06, "loss": 0.0126, "step": 5963 }, { "epoch": 4.131624523727052, "grad_norm": 0.34670788049697876, "learning_rate": 5.871012482662969e-06, "loss": 0.0135, "step": 5964 }, { "epoch": 4.132317284378248, "grad_norm": 0.4749477505683899, "learning_rate": 5.870319001386963e-06, "loss": 0.012, "step": 5965 }, { "epoch": 4.133010045029442, "grad_norm": 0.6095417141914368, "learning_rate": 5.869625520110958e-06, "loss": 0.0234, "step": 5966 }, { "epoch": 4.133702805680637, "grad_norm": 0.4395025372505188, "learning_rate": 5.868932038834953e-06, "loss": 0.0172, "step": 5967 }, { "epoch": 4.134395566331833, "grad_norm": 0.4887484014034271, "learning_rate": 5.868238557558946e-06, "loss": 0.0198, "step": 5968 }, { "epoch": 4.135088326983027, "grad_norm": 0.3763115406036377, "learning_rate": 5.867545076282941e-06, "loss": 0.0164, "step": 5969 }, { "epoch": 4.135781087634222, "grad_norm": 0.3768296241760254, "learning_rate": 5.866851595006935e-06, "loss": 0.0129, "step": 5970 }, { "epoch": 4.1364738482854175, "grad_norm": 1.0978857278823853, "learning_rate": 5.86615811373093e-06, "loss": 0.0154, "step": 5971 }, { "epoch": 4.137166608936613, "grad_norm": 0.5517410039901733, "learning_rate": 5.865464632454925e-06, "loss": 0.0157, "step": 5972 }, { "epoch": 4.137859369587807, "grad_norm": 0.4258527159690857, "learning_rate": 5.864771151178918e-06, "loss": 0.0096, "step": 5973 }, { "epoch": 4.1385521302390025, "grad_norm": 0.501067578792572, "learning_rate": 5.864077669902913e-06, "loss": 0.011, "step": 5974 }, { "epoch": 4.139244890890198, "grad_norm": 0.42069071531295776, "learning_rate": 5.863384188626907e-06, "loss": 0.012, "step": 5975 }, { "epoch": 4.139937651541392, "grad_norm": 0.4824422001838684, "learning_rate": 5.862690707350902e-06, "loss": 0.0173, "step": 5976 }, { "epoch": 4.140630412192587, "grad_norm": 0.46342602372169495, "learning_rate": 5.861997226074897e-06, "loss": 0.0163, "step": 5977 }, { "epoch": 4.141323172843783, "grad_norm": 0.3886893689632416, "learning_rate": 5.861303744798891e-06, "loss": 0.0111, "step": 5978 }, { "epoch": 4.142015933494977, "grad_norm": 0.5579688549041748, "learning_rate": 5.860610263522886e-06, "loss": 0.0156, "step": 5979 }, { "epoch": 4.142708694146172, "grad_norm": 0.434151291847229, "learning_rate": 5.859916782246879e-06, "loss": 0.0139, "step": 5980 }, { "epoch": 4.143401454797368, "grad_norm": 0.42820703983306885, "learning_rate": 5.859223300970874e-06, "loss": 0.0165, "step": 5981 }, { "epoch": 4.144094215448563, "grad_norm": 0.3862599730491638, "learning_rate": 5.858529819694869e-06, "loss": 0.0112, "step": 5982 }, { "epoch": 4.144786976099757, "grad_norm": 0.5400574803352356, "learning_rate": 5.857836338418863e-06, "loss": 0.0122, "step": 5983 }, { "epoch": 4.145479736750953, "grad_norm": 0.47512558102607727, "learning_rate": 5.857142857142858e-06, "loss": 0.0181, "step": 5984 }, { "epoch": 4.146172497402148, "grad_norm": 0.4970700442790985, "learning_rate": 5.856449375866851e-06, "loss": 0.0146, "step": 5985 }, { "epoch": 4.146865258053342, "grad_norm": 0.40132901072502136, "learning_rate": 5.855755894590846e-06, "loss": 0.0126, "step": 5986 }, { "epoch": 4.1475580187045376, "grad_norm": 0.4725714921951294, "learning_rate": 5.855062413314841e-06, "loss": 0.015, "step": 5987 }, { "epoch": 4.148250779355733, "grad_norm": 0.397342324256897, "learning_rate": 5.854368932038835e-06, "loss": 0.019, "step": 5988 }, { "epoch": 4.148943540006927, "grad_norm": 0.42282554507255554, "learning_rate": 5.85367545076283e-06, "loss": 0.0112, "step": 5989 }, { "epoch": 4.1496363006581225, "grad_norm": 0.43358930945396423, "learning_rate": 5.8529819694868235e-06, "loss": 0.0139, "step": 5990 }, { "epoch": 4.150329061309318, "grad_norm": 0.45167842507362366, "learning_rate": 5.8522884882108185e-06, "loss": 0.0209, "step": 5991 }, { "epoch": 4.151021821960513, "grad_norm": 0.4032903015613556, "learning_rate": 5.851595006934813e-06, "loss": 0.0144, "step": 5992 }, { "epoch": 4.1517145826117074, "grad_norm": 0.35296830534935, "learning_rate": 5.8509015256588075e-06, "loss": 0.0112, "step": 5993 }, { "epoch": 4.152407343262903, "grad_norm": 0.5063374042510986, "learning_rate": 5.8502080443828024e-06, "loss": 0.0219, "step": 5994 }, { "epoch": 4.153100103914098, "grad_norm": 0.4046128988265991, "learning_rate": 5.8495145631067965e-06, "loss": 0.0175, "step": 5995 }, { "epoch": 4.153792864565292, "grad_norm": 0.630039632320404, "learning_rate": 5.8488210818307915e-06, "loss": 0.017, "step": 5996 }, { "epoch": 4.154485625216488, "grad_norm": 0.4538635015487671, "learning_rate": 5.848127600554786e-06, "loss": 0.013, "step": 5997 }, { "epoch": 4.155178385867683, "grad_norm": 0.40618401765823364, "learning_rate": 5.84743411927878e-06, "loss": 0.0129, "step": 5998 }, { "epoch": 4.155871146518877, "grad_norm": 0.4100301265716553, "learning_rate": 5.8467406380027746e-06, "loss": 0.0168, "step": 5999 }, { "epoch": 4.156563907170073, "grad_norm": 0.41093727946281433, "learning_rate": 5.846047156726769e-06, "loss": 0.0088, "step": 6000 }, { "epoch": 4.157256667821268, "grad_norm": 0.4890499711036682, "learning_rate": 5.8453536754507636e-06, "loss": 0.0145, "step": 6001 }, { "epoch": 4.157949428472463, "grad_norm": 0.5121679306030273, "learning_rate": 5.8446601941747585e-06, "loss": 0.0153, "step": 6002 }, { "epoch": 4.158642189123658, "grad_norm": 0.36372610926628113, "learning_rate": 5.843966712898752e-06, "loss": 0.0101, "step": 6003 }, { "epoch": 4.159334949774853, "grad_norm": 0.4928336441516876, "learning_rate": 5.843273231622747e-06, "loss": 0.018, "step": 6004 }, { "epoch": 4.160027710426048, "grad_norm": 0.3554008901119232, "learning_rate": 5.842579750346741e-06, "loss": 0.0109, "step": 6005 }, { "epoch": 4.1607204710772425, "grad_norm": 0.6662534475326538, "learning_rate": 5.841886269070736e-06, "loss": 0.0213, "step": 6006 }, { "epoch": 4.161413231728438, "grad_norm": 0.4709452986717224, "learning_rate": 5.841192787794731e-06, "loss": 0.0133, "step": 6007 }, { "epoch": 4.162105992379633, "grad_norm": 0.4719048738479614, "learning_rate": 5.840499306518725e-06, "loss": 0.0189, "step": 6008 }, { "epoch": 4.1627987530308275, "grad_norm": 0.649800181388855, "learning_rate": 5.839805825242719e-06, "loss": 0.0162, "step": 6009 }, { "epoch": 4.163491513682023, "grad_norm": 0.459963858127594, "learning_rate": 5.839112343966713e-06, "loss": 0.018, "step": 6010 }, { "epoch": 4.164184274333218, "grad_norm": 0.5588138103485107, "learning_rate": 5.838418862690708e-06, "loss": 0.0122, "step": 6011 }, { "epoch": 4.164877034984413, "grad_norm": 0.43948790431022644, "learning_rate": 5.837725381414703e-06, "loss": 0.014, "step": 6012 }, { "epoch": 4.165569795635608, "grad_norm": 0.38377538323402405, "learning_rate": 5.837031900138697e-06, "loss": 0.0117, "step": 6013 }, { "epoch": 4.166262556286803, "grad_norm": 0.4332597851753235, "learning_rate": 5.836338418862692e-06, "loss": 0.0123, "step": 6014 }, { "epoch": 4.166955316937998, "grad_norm": 0.3645802140235901, "learning_rate": 5.835644937586685e-06, "loss": 0.0127, "step": 6015 }, { "epoch": 4.167648077589193, "grad_norm": 0.42436033487319946, "learning_rate": 5.83495145631068e-06, "loss": 0.0122, "step": 6016 }, { "epoch": 4.168340838240388, "grad_norm": 0.40074238181114197, "learning_rate": 5.834257975034675e-06, "loss": 0.014, "step": 6017 }, { "epoch": 4.169033598891583, "grad_norm": 0.49794983863830566, "learning_rate": 5.833564493758669e-06, "loss": 0.0135, "step": 6018 }, { "epoch": 4.169726359542778, "grad_norm": 0.46749451756477356, "learning_rate": 5.832871012482664e-06, "loss": 0.0213, "step": 6019 }, { "epoch": 4.170419120193973, "grad_norm": 0.5884116888046265, "learning_rate": 5.832177531206657e-06, "loss": 0.0187, "step": 6020 }, { "epoch": 4.171111880845168, "grad_norm": 0.4522908926010132, "learning_rate": 5.831484049930652e-06, "loss": 0.0157, "step": 6021 }, { "epoch": 4.171804641496363, "grad_norm": 0.621612012386322, "learning_rate": 5.830790568654647e-06, "loss": 0.0304, "step": 6022 }, { "epoch": 4.172497402147558, "grad_norm": 0.3476214110851288, "learning_rate": 5.830097087378641e-06, "loss": 0.0128, "step": 6023 }, { "epoch": 4.173190162798753, "grad_norm": 0.47607842087745667, "learning_rate": 5.829403606102636e-06, "loss": 0.0165, "step": 6024 }, { "epoch": 4.173882923449948, "grad_norm": 0.4754858911037445, "learning_rate": 5.82871012482663e-06, "loss": 0.0184, "step": 6025 }, { "epoch": 4.174575684101143, "grad_norm": 0.382719486951828, "learning_rate": 5.828016643550625e-06, "loss": 0.0144, "step": 6026 }, { "epoch": 4.175268444752338, "grad_norm": 0.5067079663276672, "learning_rate": 5.82732316227462e-06, "loss": 0.0147, "step": 6027 }, { "epoch": 4.175961205403533, "grad_norm": 0.5615599751472473, "learning_rate": 5.826629680998613e-06, "loss": 0.0251, "step": 6028 }, { "epoch": 4.176653966054728, "grad_norm": 0.4316640794277191, "learning_rate": 5.825936199722608e-06, "loss": 0.0142, "step": 6029 }, { "epoch": 4.177346726705923, "grad_norm": 0.4999231994152069, "learning_rate": 5.825242718446602e-06, "loss": 0.0192, "step": 6030 }, { "epoch": 4.178039487357118, "grad_norm": 0.3746076822280884, "learning_rate": 5.824549237170597e-06, "loss": 0.0118, "step": 6031 }, { "epoch": 4.1787322480083136, "grad_norm": 0.38020339608192444, "learning_rate": 5.823855755894592e-06, "loss": 0.0107, "step": 6032 }, { "epoch": 4.179425008659508, "grad_norm": 0.5295211672782898, "learning_rate": 5.823162274618585e-06, "loss": 0.0189, "step": 6033 }, { "epoch": 4.180117769310703, "grad_norm": 0.6093255281448364, "learning_rate": 5.82246879334258e-06, "loss": 0.0204, "step": 6034 }, { "epoch": 4.1808105299618985, "grad_norm": 0.5836412310600281, "learning_rate": 5.821775312066574e-06, "loss": 0.0197, "step": 6035 }, { "epoch": 4.181503290613093, "grad_norm": 0.4363343119621277, "learning_rate": 5.821081830790569e-06, "loss": 0.014, "step": 6036 }, { "epoch": 4.182196051264288, "grad_norm": 0.43642446398735046, "learning_rate": 5.820388349514564e-06, "loss": 0.017, "step": 6037 }, { "epoch": 4.1828888119154835, "grad_norm": 0.3896452486515045, "learning_rate": 5.8196948682385575e-06, "loss": 0.0134, "step": 6038 }, { "epoch": 4.183581572566678, "grad_norm": 0.3881409168243408, "learning_rate": 5.819001386962552e-06, "loss": 0.0129, "step": 6039 }, { "epoch": 4.184274333217873, "grad_norm": 0.432341605424881, "learning_rate": 5.8183079056865465e-06, "loss": 0.0154, "step": 6040 }, { "epoch": 4.184967093869068, "grad_norm": 0.5767948627471924, "learning_rate": 5.8176144244105414e-06, "loss": 0.0202, "step": 6041 }, { "epoch": 4.185659854520264, "grad_norm": 0.4400050640106201, "learning_rate": 5.816920943134536e-06, "loss": 0.0141, "step": 6042 }, { "epoch": 4.186352615171458, "grad_norm": 0.4426657259464264, "learning_rate": 5.8162274618585305e-06, "loss": 0.016, "step": 6043 }, { "epoch": 4.187045375822653, "grad_norm": 0.43351590633392334, "learning_rate": 5.815533980582525e-06, "loss": 0.0115, "step": 6044 }, { "epoch": 4.187738136473849, "grad_norm": 0.3226017355918884, "learning_rate": 5.814840499306519e-06, "loss": 0.0103, "step": 6045 }, { "epoch": 4.188430897125043, "grad_norm": 0.45365339517593384, "learning_rate": 5.8141470180305136e-06, "loss": 0.0161, "step": 6046 }, { "epoch": 4.189123657776238, "grad_norm": 0.419453501701355, "learning_rate": 5.8134535367545085e-06, "loss": 0.0153, "step": 6047 }, { "epoch": 4.189816418427434, "grad_norm": 0.36882466077804565, "learning_rate": 5.812760055478503e-06, "loss": 0.0124, "step": 6048 }, { "epoch": 4.190509179078628, "grad_norm": 0.4724201261997223, "learning_rate": 5.8120665742024975e-06, "loss": 0.0186, "step": 6049 }, { "epoch": 4.191201939729823, "grad_norm": 0.5134153366088867, "learning_rate": 5.811373092926491e-06, "loss": 0.0129, "step": 6050 }, { "epoch": 4.1918947003810185, "grad_norm": 0.3443831503391266, "learning_rate": 5.810679611650486e-06, "loss": 0.0125, "step": 6051 }, { "epoch": 4.192587461032213, "grad_norm": 0.36510440707206726, "learning_rate": 5.809986130374481e-06, "loss": 0.0118, "step": 6052 }, { "epoch": 4.193280221683408, "grad_norm": 0.4110822081565857, "learning_rate": 5.809292649098475e-06, "loss": 0.0159, "step": 6053 }, { "epoch": 4.1939729823346035, "grad_norm": 0.5295119881629944, "learning_rate": 5.80859916782247e-06, "loss": 0.0112, "step": 6054 }, { "epoch": 4.194665742985799, "grad_norm": 0.44774559140205383, "learning_rate": 5.807905686546464e-06, "loss": 0.0154, "step": 6055 }, { "epoch": 4.195358503636993, "grad_norm": 0.39692816138267517, "learning_rate": 5.807212205270459e-06, "loss": 0.0148, "step": 6056 }, { "epoch": 4.196051264288188, "grad_norm": 0.37895745038986206, "learning_rate": 5.806518723994453e-06, "loss": 0.0123, "step": 6057 }, { "epoch": 4.196744024939384, "grad_norm": 0.4494268298149109, "learning_rate": 5.805825242718447e-06, "loss": 0.0123, "step": 6058 }, { "epoch": 4.197436785590578, "grad_norm": 0.44115379452705383, "learning_rate": 5.805131761442442e-06, "loss": 0.0146, "step": 6059 }, { "epoch": 4.198129546241773, "grad_norm": 0.33701446652412415, "learning_rate": 5.804438280166436e-06, "loss": 0.0119, "step": 6060 }, { "epoch": 4.198822306892969, "grad_norm": 0.4546017646789551, "learning_rate": 5.803744798890431e-06, "loss": 0.0162, "step": 6061 }, { "epoch": 4.199515067544164, "grad_norm": 0.34789028763771057, "learning_rate": 5.803051317614426e-06, "loss": 0.0097, "step": 6062 }, { "epoch": 4.200207828195358, "grad_norm": 0.3898346722126007, "learning_rate": 5.802357836338419e-06, "loss": 0.0097, "step": 6063 }, { "epoch": 4.200900588846554, "grad_norm": 0.49162551760673523, "learning_rate": 5.801664355062414e-06, "loss": 0.0173, "step": 6064 }, { "epoch": 4.201593349497749, "grad_norm": 0.3743383288383484, "learning_rate": 5.800970873786408e-06, "loss": 0.0123, "step": 6065 }, { "epoch": 4.202286110148943, "grad_norm": 0.4847966730594635, "learning_rate": 5.800277392510403e-06, "loss": 0.016, "step": 6066 }, { "epoch": 4.2029788708001385, "grad_norm": 0.5072675347328186, "learning_rate": 5.799583911234398e-06, "loss": 0.0165, "step": 6067 }, { "epoch": 4.203671631451334, "grad_norm": 0.39113467931747437, "learning_rate": 5.798890429958391e-06, "loss": 0.0122, "step": 6068 }, { "epoch": 4.204364392102528, "grad_norm": 0.4836355745792389, "learning_rate": 5.798196948682386e-06, "loss": 0.0209, "step": 6069 }, { "epoch": 4.2050571527537235, "grad_norm": 0.6245148181915283, "learning_rate": 5.79750346740638e-06, "loss": 0.0108, "step": 6070 }, { "epoch": 4.205749913404919, "grad_norm": 0.4833970069885254, "learning_rate": 5.796809986130375e-06, "loss": 0.0161, "step": 6071 }, { "epoch": 4.206442674056113, "grad_norm": 0.5720314979553223, "learning_rate": 5.79611650485437e-06, "loss": 0.0224, "step": 6072 }, { "epoch": 4.207135434707308, "grad_norm": 0.45294252038002014, "learning_rate": 5.795423023578364e-06, "loss": 0.0215, "step": 6073 }, { "epoch": 4.207828195358504, "grad_norm": 0.4205402433872223, "learning_rate": 5.794729542302359e-06, "loss": 0.0145, "step": 6074 }, { "epoch": 4.208520956009699, "grad_norm": 0.48753929138183594, "learning_rate": 5.794036061026352e-06, "loss": 0.0203, "step": 6075 }, { "epoch": 4.209213716660893, "grad_norm": 0.34081822633743286, "learning_rate": 5.793342579750347e-06, "loss": 0.0129, "step": 6076 }, { "epoch": 4.209906477312089, "grad_norm": 0.406585156917572, "learning_rate": 5.792649098474342e-06, "loss": 0.0144, "step": 6077 }, { "epoch": 4.210599237963284, "grad_norm": 0.405737042427063, "learning_rate": 5.791955617198336e-06, "loss": 0.0122, "step": 6078 }, { "epoch": 4.211291998614478, "grad_norm": 0.3602556884288788, "learning_rate": 5.791262135922331e-06, "loss": 0.0158, "step": 6079 }, { "epoch": 4.211984759265674, "grad_norm": 0.4407044053077698, "learning_rate": 5.790568654646324e-06, "loss": 0.0124, "step": 6080 }, { "epoch": 4.212677519916869, "grad_norm": 0.5421193242073059, "learning_rate": 5.789875173370319e-06, "loss": 0.0165, "step": 6081 }, { "epoch": 4.213370280568064, "grad_norm": 0.3487362563610077, "learning_rate": 5.789181692094314e-06, "loss": 0.0113, "step": 6082 }, { "epoch": 4.214063041219259, "grad_norm": 0.45292338728904724, "learning_rate": 5.788488210818308e-06, "loss": 0.0159, "step": 6083 }, { "epoch": 4.214755801870454, "grad_norm": 0.4305095076560974, "learning_rate": 5.787794729542303e-06, "loss": 0.0181, "step": 6084 }, { "epoch": 4.215448562521649, "grad_norm": 0.413237065076828, "learning_rate": 5.787101248266297e-06, "loss": 0.015, "step": 6085 }, { "epoch": 4.2161413231728435, "grad_norm": 0.4743373394012451, "learning_rate": 5.786407766990291e-06, "loss": 0.0168, "step": 6086 }, { "epoch": 4.216834083824039, "grad_norm": 0.42930710315704346, "learning_rate": 5.785714285714286e-06, "loss": 0.0135, "step": 6087 }, { "epoch": 4.217526844475234, "grad_norm": 0.7027872800827026, "learning_rate": 5.7850208044382804e-06, "loss": 0.0151, "step": 6088 }, { "epoch": 4.2182196051264285, "grad_norm": 0.4259084463119507, "learning_rate": 5.784327323162275e-06, "loss": 0.0132, "step": 6089 }, { "epoch": 4.218912365777624, "grad_norm": 0.488026887178421, "learning_rate": 5.7836338418862695e-06, "loss": 0.0135, "step": 6090 }, { "epoch": 4.219605126428819, "grad_norm": 0.4157416522502899, "learning_rate": 5.782940360610264e-06, "loss": 0.0134, "step": 6091 }, { "epoch": 4.220297887080013, "grad_norm": 0.4387360215187073, "learning_rate": 5.782246879334259e-06, "loss": 0.0129, "step": 6092 }, { "epoch": 4.220990647731209, "grad_norm": 0.41994211077690125, "learning_rate": 5.7815533980582526e-06, "loss": 0.0123, "step": 6093 }, { "epoch": 4.221683408382404, "grad_norm": 0.36253395676612854, "learning_rate": 5.7808599167822475e-06, "loss": 0.0119, "step": 6094 }, { "epoch": 4.222376169033599, "grad_norm": 0.3640895485877991, "learning_rate": 5.780166435506242e-06, "loss": 0.014, "step": 6095 }, { "epoch": 4.223068929684794, "grad_norm": 0.41119879484176636, "learning_rate": 5.7794729542302365e-06, "loss": 0.0126, "step": 6096 }, { "epoch": 4.223761690335989, "grad_norm": 0.4430147707462311, "learning_rate": 5.7787794729542314e-06, "loss": 0.0136, "step": 6097 }, { "epoch": 4.224454450987184, "grad_norm": 0.5249005556106567, "learning_rate": 5.778085991678225e-06, "loss": 0.0176, "step": 6098 }, { "epoch": 4.225147211638379, "grad_norm": 0.3413819968700409, "learning_rate": 5.77739251040222e-06, "loss": 0.0113, "step": 6099 }, { "epoch": 4.225839972289574, "grad_norm": 0.3949770927429199, "learning_rate": 5.776699029126214e-06, "loss": 0.0113, "step": 6100 }, { "epoch": 4.226532732940769, "grad_norm": 0.43476852774620056, "learning_rate": 5.776005547850209e-06, "loss": 0.013, "step": 6101 }, { "epoch": 4.227225493591964, "grad_norm": 0.6038212180137634, "learning_rate": 5.7753120665742036e-06, "loss": 0.0147, "step": 6102 }, { "epoch": 4.227918254243159, "grad_norm": 0.44593775272369385, "learning_rate": 5.774618585298198e-06, "loss": 0.0122, "step": 6103 }, { "epoch": 4.228611014894354, "grad_norm": 0.43964704871177673, "learning_rate": 5.773925104022193e-06, "loss": 0.0141, "step": 6104 }, { "epoch": 4.229303775545549, "grad_norm": 0.45246773958206177, "learning_rate": 5.773231622746186e-06, "loss": 0.0179, "step": 6105 }, { "epoch": 4.229996536196744, "grad_norm": 0.4594613313674927, "learning_rate": 5.772538141470181e-06, "loss": 0.0156, "step": 6106 }, { "epoch": 4.230689296847939, "grad_norm": 0.44228464365005493, "learning_rate": 5.771844660194176e-06, "loss": 0.0139, "step": 6107 }, { "epoch": 4.231382057499134, "grad_norm": 0.5431798100471497, "learning_rate": 5.77115117891817e-06, "loss": 0.0179, "step": 6108 }, { "epoch": 4.232074818150329, "grad_norm": 0.4152578115463257, "learning_rate": 5.770457697642165e-06, "loss": 0.0171, "step": 6109 }, { "epoch": 4.232767578801524, "grad_norm": 0.4811961352825165, "learning_rate": 5.769764216366158e-06, "loss": 0.0164, "step": 6110 }, { "epoch": 4.233460339452719, "grad_norm": 0.45115652680397034, "learning_rate": 5.769070735090153e-06, "loss": 0.0198, "step": 6111 }, { "epoch": 4.234153100103914, "grad_norm": 0.3338993787765503, "learning_rate": 5.768377253814148e-06, "loss": 0.0111, "step": 6112 }, { "epoch": 4.234845860755109, "grad_norm": 0.5009710788726807, "learning_rate": 5.767683772538142e-06, "loss": 0.0159, "step": 6113 }, { "epoch": 4.235538621406304, "grad_norm": 0.384583055973053, "learning_rate": 5.766990291262137e-06, "loss": 0.0128, "step": 6114 }, { "epoch": 4.2362313820574995, "grad_norm": 0.6601429581642151, "learning_rate": 5.766296809986131e-06, "loss": 0.0168, "step": 6115 }, { "epoch": 4.236924142708694, "grad_norm": 0.37512263655662537, "learning_rate": 5.765603328710125e-06, "loss": 0.0111, "step": 6116 }, { "epoch": 4.237616903359889, "grad_norm": 0.39865222573280334, "learning_rate": 5.76490984743412e-06, "loss": 0.0141, "step": 6117 }, { "epoch": 4.238309664011084, "grad_norm": 0.4099164307117462, "learning_rate": 5.764216366158114e-06, "loss": 0.0149, "step": 6118 }, { "epoch": 4.239002424662279, "grad_norm": 0.4311049282550812, "learning_rate": 5.763522884882109e-06, "loss": 0.0171, "step": 6119 }, { "epoch": 4.239695185313474, "grad_norm": 0.6434735655784607, "learning_rate": 5.762829403606103e-06, "loss": 0.0176, "step": 6120 }, { "epoch": 4.240387945964669, "grad_norm": 0.4360758364200592, "learning_rate": 5.762135922330098e-06, "loss": 0.0189, "step": 6121 }, { "epoch": 4.241080706615865, "grad_norm": 0.5391027331352234, "learning_rate": 5.761442441054093e-06, "loss": 0.016, "step": 6122 }, { "epoch": 4.241773467267059, "grad_norm": 0.47517144680023193, "learning_rate": 5.760748959778086e-06, "loss": 0.0107, "step": 6123 }, { "epoch": 4.242466227918254, "grad_norm": 0.4058322310447693, "learning_rate": 5.760055478502081e-06, "loss": 0.0121, "step": 6124 }, { "epoch": 4.24315898856945, "grad_norm": 0.48418089747428894, "learning_rate": 5.759361997226075e-06, "loss": 0.0162, "step": 6125 }, { "epoch": 4.243851749220644, "grad_norm": 0.3738580644130707, "learning_rate": 5.75866851595007e-06, "loss": 0.0151, "step": 6126 }, { "epoch": 4.244544509871839, "grad_norm": 0.4556715488433838, "learning_rate": 5.757975034674065e-06, "loss": 0.0166, "step": 6127 }, { "epoch": 4.245237270523035, "grad_norm": 0.36216968297958374, "learning_rate": 5.757281553398058e-06, "loss": 0.012, "step": 6128 }, { "epoch": 4.245930031174229, "grad_norm": 0.6662531495094299, "learning_rate": 5.756588072122053e-06, "loss": 0.016, "step": 6129 }, { "epoch": 4.246622791825424, "grad_norm": 0.477200984954834, "learning_rate": 5.755894590846047e-06, "loss": 0.0195, "step": 6130 }, { "epoch": 4.2473155524766195, "grad_norm": 0.45956000685691833, "learning_rate": 5.755201109570042e-06, "loss": 0.0129, "step": 6131 }, { "epoch": 4.248008313127814, "grad_norm": 0.38181018829345703, "learning_rate": 5.754507628294037e-06, "loss": 0.012, "step": 6132 }, { "epoch": 4.248701073779009, "grad_norm": 0.48798617720603943, "learning_rate": 5.753814147018031e-06, "loss": 0.0152, "step": 6133 }, { "epoch": 4.2493938344302045, "grad_norm": 0.48894646763801575, "learning_rate": 5.753120665742025e-06, "loss": 0.0173, "step": 6134 }, { "epoch": 4.2500865950814, "grad_norm": 0.5880109071731567, "learning_rate": 5.7524271844660194e-06, "loss": 0.0203, "step": 6135 }, { "epoch": 4.250779355732594, "grad_norm": 0.5441635847091675, "learning_rate": 5.751733703190014e-06, "loss": 0.0122, "step": 6136 }, { "epoch": 4.251472116383789, "grad_norm": 0.5183771848678589, "learning_rate": 5.751040221914009e-06, "loss": 0.0157, "step": 6137 }, { "epoch": 4.252164877034985, "grad_norm": 0.39211976528167725, "learning_rate": 5.750346740638003e-06, "loss": 0.016, "step": 6138 }, { "epoch": 4.252857637686179, "grad_norm": 0.5179469585418701, "learning_rate": 5.749653259361998e-06, "loss": 0.0155, "step": 6139 }, { "epoch": 4.253550398337374, "grad_norm": 0.6000032424926758, "learning_rate": 5.7489597780859916e-06, "loss": 0.0139, "step": 6140 }, { "epoch": 4.25424315898857, "grad_norm": 0.6474589109420776, "learning_rate": 5.7482662968099865e-06, "loss": 0.0184, "step": 6141 }, { "epoch": 4.254935919639765, "grad_norm": 0.37843167781829834, "learning_rate": 5.747572815533981e-06, "loss": 0.014, "step": 6142 }, { "epoch": 4.255628680290959, "grad_norm": 0.449325293302536, "learning_rate": 5.7468793342579755e-06, "loss": 0.0115, "step": 6143 }, { "epoch": 4.256321440942155, "grad_norm": 0.4086148738861084, "learning_rate": 5.7461858529819704e-06, "loss": 0.0132, "step": 6144 }, { "epoch": 4.25701420159335, "grad_norm": 0.7150774002075195, "learning_rate": 5.745492371705964e-06, "loss": 0.0158, "step": 6145 }, { "epoch": 4.257706962244544, "grad_norm": 0.4631674587726593, "learning_rate": 5.744798890429959e-06, "loss": 0.0126, "step": 6146 }, { "epoch": 4.2583997228957395, "grad_norm": 0.3581140637397766, "learning_rate": 5.744105409153953e-06, "loss": 0.0133, "step": 6147 }, { "epoch": 4.259092483546935, "grad_norm": 0.3423003554344177, "learning_rate": 5.743411927877948e-06, "loss": 0.0113, "step": 6148 }, { "epoch": 4.259785244198129, "grad_norm": 0.5469533205032349, "learning_rate": 5.7427184466019426e-06, "loss": 0.0158, "step": 6149 }, { "epoch": 4.2604780048493245, "grad_norm": 0.5302712321281433, "learning_rate": 5.742024965325937e-06, "loss": 0.0162, "step": 6150 }, { "epoch": 4.26117076550052, "grad_norm": 0.5141806602478027, "learning_rate": 5.741331484049932e-06, "loss": 0.0149, "step": 6151 }, { "epoch": 4.261863526151714, "grad_norm": 0.48647239804267883, "learning_rate": 5.740638002773925e-06, "loss": 0.0126, "step": 6152 }, { "epoch": 4.262556286802909, "grad_norm": 0.464964896440506, "learning_rate": 5.73994452149792e-06, "loss": 0.0117, "step": 6153 }, { "epoch": 4.263249047454105, "grad_norm": 0.48162877559661865, "learning_rate": 5.739251040221915e-06, "loss": 0.0174, "step": 6154 }, { "epoch": 4.2639418081053, "grad_norm": 0.4663795232772827, "learning_rate": 5.738557558945909e-06, "loss": 0.0149, "step": 6155 }, { "epoch": 4.264634568756494, "grad_norm": 0.3906458616256714, "learning_rate": 5.737864077669904e-06, "loss": 0.0196, "step": 6156 }, { "epoch": 4.26532732940769, "grad_norm": 0.36130955815315247, "learning_rate": 5.737170596393897e-06, "loss": 0.011, "step": 6157 }, { "epoch": 4.266020090058885, "grad_norm": 0.4151374399662018, "learning_rate": 5.736477115117892e-06, "loss": 0.0154, "step": 6158 }, { "epoch": 4.266712850710079, "grad_norm": 0.4259635806083679, "learning_rate": 5.735783633841887e-06, "loss": 0.0167, "step": 6159 }, { "epoch": 4.267405611361275, "grad_norm": 0.4480990767478943, "learning_rate": 5.735090152565881e-06, "loss": 0.0153, "step": 6160 }, { "epoch": 4.26809837201247, "grad_norm": 0.4132225811481476, "learning_rate": 5.734396671289876e-06, "loss": 0.0128, "step": 6161 }, { "epoch": 4.268791132663665, "grad_norm": 0.5066116452217102, "learning_rate": 5.73370319001387e-06, "loss": 0.0205, "step": 6162 }, { "epoch": 4.26948389331486, "grad_norm": 0.4326753318309784, "learning_rate": 5.733009708737865e-06, "loss": 0.0105, "step": 6163 }, { "epoch": 4.270176653966055, "grad_norm": 0.37269070744514465, "learning_rate": 5.732316227461859e-06, "loss": 0.0114, "step": 6164 }, { "epoch": 4.27086941461725, "grad_norm": 0.7043964862823486, "learning_rate": 5.731622746185853e-06, "loss": 0.0167, "step": 6165 }, { "epoch": 4.2715621752684445, "grad_norm": 0.37173211574554443, "learning_rate": 5.730929264909848e-06, "loss": 0.012, "step": 6166 }, { "epoch": 4.27225493591964, "grad_norm": 0.5097013711929321, "learning_rate": 5.730235783633842e-06, "loss": 0.0162, "step": 6167 }, { "epoch": 4.272947696570835, "grad_norm": 0.448404997587204, "learning_rate": 5.729542302357837e-06, "loss": 0.0149, "step": 6168 }, { "epoch": 4.2736404572220295, "grad_norm": 0.4259950518608093, "learning_rate": 5.728848821081832e-06, "loss": 0.0118, "step": 6169 }, { "epoch": 4.274333217873225, "grad_norm": 0.5313334465026855, "learning_rate": 5.728155339805825e-06, "loss": 0.0178, "step": 6170 }, { "epoch": 4.27502597852442, "grad_norm": 0.4327642321586609, "learning_rate": 5.72746185852982e-06, "loss": 0.0152, "step": 6171 }, { "epoch": 4.275718739175614, "grad_norm": 0.30305030941963196, "learning_rate": 5.726768377253814e-06, "loss": 0.0119, "step": 6172 }, { "epoch": 4.27641149982681, "grad_norm": 0.42507559061050415, "learning_rate": 5.726074895977809e-06, "loss": 0.0166, "step": 6173 }, { "epoch": 4.277104260478005, "grad_norm": 0.5407571792602539, "learning_rate": 5.725381414701804e-06, "loss": 0.0201, "step": 6174 }, { "epoch": 4.2777970211292, "grad_norm": 0.43881678581237793, "learning_rate": 5.724687933425797e-06, "loss": 0.0149, "step": 6175 }, { "epoch": 4.278489781780395, "grad_norm": 0.47735050320625305, "learning_rate": 5.723994452149792e-06, "loss": 0.0165, "step": 6176 }, { "epoch": 4.27918254243159, "grad_norm": 0.35856977105140686, "learning_rate": 5.723300970873786e-06, "loss": 0.0163, "step": 6177 }, { "epoch": 4.279875303082785, "grad_norm": 0.6316174268722534, "learning_rate": 5.722607489597781e-06, "loss": 0.0116, "step": 6178 }, { "epoch": 4.28056806373398, "grad_norm": 0.4738340377807617, "learning_rate": 5.721914008321776e-06, "loss": 0.0174, "step": 6179 }, { "epoch": 4.281260824385175, "grad_norm": 0.4427250027656555, "learning_rate": 5.72122052704577e-06, "loss": 0.0155, "step": 6180 }, { "epoch": 4.28195358503637, "grad_norm": 0.4292624294757843, "learning_rate": 5.720527045769765e-06, "loss": 0.0105, "step": 6181 }, { "epoch": 4.282646345687565, "grad_norm": 0.35663384199142456, "learning_rate": 5.7198335644937584e-06, "loss": 0.011, "step": 6182 }, { "epoch": 4.28333910633876, "grad_norm": 0.4772343933582306, "learning_rate": 5.719140083217753e-06, "loss": 0.0195, "step": 6183 }, { "epoch": 4.284031866989955, "grad_norm": 0.48446860909461975, "learning_rate": 5.718446601941748e-06, "loss": 0.0159, "step": 6184 }, { "epoch": 4.28472462764115, "grad_norm": 0.5933783650398254, "learning_rate": 5.717753120665742e-06, "loss": 0.0184, "step": 6185 }, { "epoch": 4.285417388292345, "grad_norm": 0.5756788849830627, "learning_rate": 5.717059639389737e-06, "loss": 0.012, "step": 6186 }, { "epoch": 4.28611014894354, "grad_norm": 0.5146098136901855, "learning_rate": 5.7163661581137306e-06, "loss": 0.0179, "step": 6187 }, { "epoch": 4.286802909594735, "grad_norm": 0.47209280729293823, "learning_rate": 5.7156726768377255e-06, "loss": 0.0143, "step": 6188 }, { "epoch": 4.28749567024593, "grad_norm": 0.37178516387939453, "learning_rate": 5.7149791955617204e-06, "loss": 0.0123, "step": 6189 }, { "epoch": 4.288188430897125, "grad_norm": 0.4624086320400238, "learning_rate": 5.7142857142857145e-06, "loss": 0.0144, "step": 6190 }, { "epoch": 4.28888119154832, "grad_norm": 0.5005056262016296, "learning_rate": 5.7135922330097094e-06, "loss": 0.0213, "step": 6191 }, { "epoch": 4.289573952199515, "grad_norm": 0.5770010948181152, "learning_rate": 5.7128987517337035e-06, "loss": 0.0162, "step": 6192 }, { "epoch": 4.29026671285071, "grad_norm": 0.3798089623451233, "learning_rate": 5.712205270457698e-06, "loss": 0.0135, "step": 6193 }, { "epoch": 4.290959473501905, "grad_norm": 0.5228467583656311, "learning_rate": 5.7115117891816926e-06, "loss": 0.0164, "step": 6194 }, { "epoch": 4.2916522341531005, "grad_norm": 0.4126785695552826, "learning_rate": 5.710818307905687e-06, "loss": 0.0165, "step": 6195 }, { "epoch": 4.292344994804295, "grad_norm": 0.39764726161956787, "learning_rate": 5.7101248266296816e-06, "loss": 0.0157, "step": 6196 }, { "epoch": 4.29303775545549, "grad_norm": 0.386638879776001, "learning_rate": 5.709431345353676e-06, "loss": 0.0128, "step": 6197 }, { "epoch": 4.293730516106685, "grad_norm": 0.5254207849502563, "learning_rate": 5.708737864077671e-06, "loss": 0.028, "step": 6198 }, { "epoch": 4.29442327675788, "grad_norm": 0.46779918670654297, "learning_rate": 5.7080443828016655e-06, "loss": 0.014, "step": 6199 }, { "epoch": 4.295116037409075, "grad_norm": 0.31714537739753723, "learning_rate": 5.707350901525659e-06, "loss": 0.011, "step": 6200 }, { "epoch": 4.29580879806027, "grad_norm": 0.37947049736976624, "learning_rate": 5.706657420249654e-06, "loss": 0.0156, "step": 6201 }, { "epoch": 4.296501558711465, "grad_norm": 0.4820058047771454, "learning_rate": 5.705963938973648e-06, "loss": 0.0151, "step": 6202 }, { "epoch": 4.29719431936266, "grad_norm": 0.4857361912727356, "learning_rate": 5.705270457697643e-06, "loss": 0.0094, "step": 6203 }, { "epoch": 4.297887080013855, "grad_norm": 0.3891202509403229, "learning_rate": 5.704576976421638e-06, "loss": 0.0103, "step": 6204 }, { "epoch": 4.298579840665051, "grad_norm": 0.5287541151046753, "learning_rate": 5.703883495145631e-06, "loss": 0.0161, "step": 6205 }, { "epoch": 4.299272601316245, "grad_norm": 0.4331056475639343, "learning_rate": 5.703190013869626e-06, "loss": 0.0115, "step": 6206 }, { "epoch": 4.29996536196744, "grad_norm": 0.4861893951892853, "learning_rate": 5.70249653259362e-06, "loss": 0.0154, "step": 6207 }, { "epoch": 4.300658122618636, "grad_norm": 0.43413400650024414, "learning_rate": 5.701803051317615e-06, "loss": 0.0124, "step": 6208 }, { "epoch": 4.30135088326983, "grad_norm": 0.41063931584358215, "learning_rate": 5.70110957004161e-06, "loss": 0.0135, "step": 6209 }, { "epoch": 4.302043643921025, "grad_norm": 0.5673719048500061, "learning_rate": 5.700416088765604e-06, "loss": 0.016, "step": 6210 }, { "epoch": 4.3027364045722205, "grad_norm": 0.4758017361164093, "learning_rate": 5.699722607489599e-06, "loss": 0.0151, "step": 6211 }, { "epoch": 4.303429165223415, "grad_norm": 0.4562930166721344, "learning_rate": 5.699029126213592e-06, "loss": 0.0202, "step": 6212 }, { "epoch": 4.30412192587461, "grad_norm": 0.44941607117652893, "learning_rate": 5.698335644937587e-06, "loss": 0.0226, "step": 6213 }, { "epoch": 4.3048146865258055, "grad_norm": 0.6464312076568604, "learning_rate": 5.697642163661582e-06, "loss": 0.0166, "step": 6214 }, { "epoch": 4.305507447177001, "grad_norm": 0.4331834316253662, "learning_rate": 5.696948682385576e-06, "loss": 0.0149, "step": 6215 }, { "epoch": 4.306200207828195, "grad_norm": 0.5660507082939148, "learning_rate": 5.696255201109571e-06, "loss": 0.0243, "step": 6216 }, { "epoch": 4.30689296847939, "grad_norm": 0.513453483581543, "learning_rate": 5.695561719833564e-06, "loss": 0.0168, "step": 6217 }, { "epoch": 4.307585729130586, "grad_norm": 0.39191699028015137, "learning_rate": 5.694868238557559e-06, "loss": 0.0151, "step": 6218 }, { "epoch": 4.30827848978178, "grad_norm": 0.46802085638046265, "learning_rate": 5.694174757281554e-06, "loss": 0.0139, "step": 6219 }, { "epoch": 4.308971250432975, "grad_norm": 0.5223895907402039, "learning_rate": 5.693481276005548e-06, "loss": 0.0151, "step": 6220 }, { "epoch": 4.309664011084171, "grad_norm": 0.4044167101383209, "learning_rate": 5.692787794729543e-06, "loss": 0.0137, "step": 6221 }, { "epoch": 4.310356771735365, "grad_norm": 0.3542228639125824, "learning_rate": 5.692094313453536e-06, "loss": 0.0106, "step": 6222 }, { "epoch": 4.31104953238656, "grad_norm": 0.44608378410339355, "learning_rate": 5.691400832177531e-06, "loss": 0.0156, "step": 6223 }, { "epoch": 4.311742293037756, "grad_norm": 0.46396249532699585, "learning_rate": 5.690707350901526e-06, "loss": 0.0203, "step": 6224 }, { "epoch": 4.312435053688951, "grad_norm": 0.4457850158214569, "learning_rate": 5.69001386962552e-06, "loss": 0.0132, "step": 6225 }, { "epoch": 4.313127814340145, "grad_norm": 0.4410547912120819, "learning_rate": 5.689320388349515e-06, "loss": 0.015, "step": 6226 }, { "epoch": 4.3138205749913405, "grad_norm": 0.47477442026138306, "learning_rate": 5.688626907073509e-06, "loss": 0.0163, "step": 6227 }, { "epoch": 4.314513335642536, "grad_norm": 0.4961939752101898, "learning_rate": 5.687933425797504e-06, "loss": 0.0179, "step": 6228 }, { "epoch": 4.31520609629373, "grad_norm": 0.33782264590263367, "learning_rate": 5.687239944521499e-06, "loss": 0.0092, "step": 6229 }, { "epoch": 4.3158988569449255, "grad_norm": 0.5120416879653931, "learning_rate": 5.686546463245492e-06, "loss": 0.0124, "step": 6230 }, { "epoch": 4.316591617596121, "grad_norm": 0.43162956833839417, "learning_rate": 5.685852981969487e-06, "loss": 0.0131, "step": 6231 }, { "epoch": 4.317284378247315, "grad_norm": 0.4046199917793274, "learning_rate": 5.685159500693481e-06, "loss": 0.0128, "step": 6232 }, { "epoch": 4.31797713889851, "grad_norm": 0.4141041934490204, "learning_rate": 5.684466019417476e-06, "loss": 0.0125, "step": 6233 }, { "epoch": 4.318669899549706, "grad_norm": 0.5312274694442749, "learning_rate": 5.683772538141471e-06, "loss": 0.0155, "step": 6234 }, { "epoch": 4.319362660200901, "grad_norm": 0.6274176239967346, "learning_rate": 5.6830790568654645e-06, "loss": 0.0132, "step": 6235 }, { "epoch": 4.320055420852095, "grad_norm": 0.41988468170166016, "learning_rate": 5.6823855755894594e-06, "loss": 0.0123, "step": 6236 }, { "epoch": 4.320748181503291, "grad_norm": 0.5616729259490967, "learning_rate": 5.6816920943134535e-06, "loss": 0.0172, "step": 6237 }, { "epoch": 4.321440942154486, "grad_norm": 0.46666640043258667, "learning_rate": 5.6809986130374484e-06, "loss": 0.0168, "step": 6238 }, { "epoch": 4.32213370280568, "grad_norm": 0.5138194561004639, "learning_rate": 5.680305131761443e-06, "loss": 0.015, "step": 6239 }, { "epoch": 4.322826463456876, "grad_norm": 0.4188655912876129, "learning_rate": 5.6796116504854375e-06, "loss": 0.0161, "step": 6240 }, { "epoch": 4.323519224108071, "grad_norm": 0.44582825899124146, "learning_rate": 5.6789181692094316e-06, "loss": 0.0128, "step": 6241 }, { "epoch": 4.324211984759265, "grad_norm": 0.6632908582687378, "learning_rate": 5.678224687933426e-06, "loss": 0.0153, "step": 6242 }, { "epoch": 4.3249047454104605, "grad_norm": 0.49136364459991455, "learning_rate": 5.6775312066574206e-06, "loss": 0.0132, "step": 6243 }, { "epoch": 4.325597506061656, "grad_norm": 0.46757856011390686, "learning_rate": 5.6768377253814155e-06, "loss": 0.0174, "step": 6244 }, { "epoch": 4.326290266712851, "grad_norm": 0.5878485441207886, "learning_rate": 5.67614424410541e-06, "loss": 0.0121, "step": 6245 }, { "epoch": 4.3269830273640455, "grad_norm": 0.37498223781585693, "learning_rate": 5.6754507628294045e-06, "loss": 0.0152, "step": 6246 }, { "epoch": 4.327675788015241, "grad_norm": 0.33918726444244385, "learning_rate": 5.674757281553398e-06, "loss": 0.0113, "step": 6247 }, { "epoch": 4.328368548666436, "grad_norm": 0.3665863275527954, "learning_rate": 5.674063800277393e-06, "loss": 0.0112, "step": 6248 }, { "epoch": 4.3290613093176304, "grad_norm": 0.5431647300720215, "learning_rate": 5.673370319001388e-06, "loss": 0.0196, "step": 6249 }, { "epoch": 4.329754069968826, "grad_norm": 0.36368706822395325, "learning_rate": 5.672676837725382e-06, "loss": 0.0126, "step": 6250 }, { "epoch": 4.330446830620021, "grad_norm": 0.47009578347206116, "learning_rate": 5.671983356449377e-06, "loss": 0.0113, "step": 6251 }, { "epoch": 4.331139591271215, "grad_norm": 0.45506036281585693, "learning_rate": 5.67128987517337e-06, "loss": 0.0152, "step": 6252 }, { "epoch": 4.331832351922411, "grad_norm": 0.5325137972831726, "learning_rate": 5.670596393897365e-06, "loss": 0.019, "step": 6253 }, { "epoch": 4.332525112573606, "grad_norm": 0.43603190779685974, "learning_rate": 5.66990291262136e-06, "loss": 0.0152, "step": 6254 }, { "epoch": 4.333217873224801, "grad_norm": 0.6645075082778931, "learning_rate": 5.669209431345354e-06, "loss": 0.0222, "step": 6255 }, { "epoch": 4.333910633875996, "grad_norm": 0.43905749917030334, "learning_rate": 5.668515950069349e-06, "loss": 0.0135, "step": 6256 }, { "epoch": 4.334603394527191, "grad_norm": 0.4511397182941437, "learning_rate": 5.667822468793343e-06, "loss": 0.0148, "step": 6257 }, { "epoch": 4.335296155178386, "grad_norm": 0.3553571403026581, "learning_rate": 5.667128987517338e-06, "loss": 0.0101, "step": 6258 }, { "epoch": 4.335988915829581, "grad_norm": 0.5838598608970642, "learning_rate": 5.666435506241333e-06, "loss": 0.0134, "step": 6259 }, { "epoch": 4.336681676480776, "grad_norm": 0.5036539435386658, "learning_rate": 5.665742024965326e-06, "loss": 0.0107, "step": 6260 }, { "epoch": 4.337374437131971, "grad_norm": 0.4577176868915558, "learning_rate": 5.665048543689321e-06, "loss": 0.0141, "step": 6261 }, { "epoch": 4.3380671977831655, "grad_norm": 0.35945776104927063, "learning_rate": 5.664355062413315e-06, "loss": 0.0125, "step": 6262 }, { "epoch": 4.338759958434361, "grad_norm": 0.41411110758781433, "learning_rate": 5.66366158113731e-06, "loss": 0.0154, "step": 6263 }, { "epoch": 4.339452719085556, "grad_norm": 0.5097288489341736, "learning_rate": 5.662968099861305e-06, "loss": 0.0145, "step": 6264 }, { "epoch": 4.340145479736751, "grad_norm": 0.5383662581443787, "learning_rate": 5.662274618585298e-06, "loss": 0.013, "step": 6265 }, { "epoch": 4.340838240387946, "grad_norm": 0.44316715002059937, "learning_rate": 5.661581137309293e-06, "loss": 0.0124, "step": 6266 }, { "epoch": 4.341531001039141, "grad_norm": 0.5001533627510071, "learning_rate": 5.660887656033287e-06, "loss": 0.0168, "step": 6267 }, { "epoch": 4.342223761690336, "grad_norm": 0.31020647287368774, "learning_rate": 5.660194174757282e-06, "loss": 0.0098, "step": 6268 }, { "epoch": 4.342916522341531, "grad_norm": 0.40882495045661926, "learning_rate": 5.659500693481277e-06, "loss": 0.0117, "step": 6269 }, { "epoch": 4.343609282992726, "grad_norm": 0.4471372663974762, "learning_rate": 5.65880721220527e-06, "loss": 0.013, "step": 6270 }, { "epoch": 4.344302043643921, "grad_norm": 0.4961498975753784, "learning_rate": 5.658113730929265e-06, "loss": 0.0143, "step": 6271 }, { "epoch": 4.344994804295116, "grad_norm": 0.5297603011131287, "learning_rate": 5.657420249653259e-06, "loss": 0.0173, "step": 6272 }, { "epoch": 4.345687564946311, "grad_norm": 0.5457528829574585, "learning_rate": 5.656726768377254e-06, "loss": 0.015, "step": 6273 }, { "epoch": 4.346380325597506, "grad_norm": 0.922075092792511, "learning_rate": 5.656033287101249e-06, "loss": 0.0175, "step": 6274 }, { "epoch": 4.3470730862487015, "grad_norm": 0.4334849715232849, "learning_rate": 5.655339805825243e-06, "loss": 0.0196, "step": 6275 }, { "epoch": 4.347765846899896, "grad_norm": 0.5204346776008606, "learning_rate": 5.654646324549238e-06, "loss": 0.0174, "step": 6276 }, { "epoch": 4.348458607551091, "grad_norm": 0.4305042624473572, "learning_rate": 5.653952843273231e-06, "loss": 0.0137, "step": 6277 }, { "epoch": 4.349151368202286, "grad_norm": 0.4151136875152588, "learning_rate": 5.653259361997226e-06, "loss": 0.0134, "step": 6278 }, { "epoch": 4.349844128853481, "grad_norm": 0.7482634782791138, "learning_rate": 5.652565880721221e-06, "loss": 0.0175, "step": 6279 }, { "epoch": 4.350536889504676, "grad_norm": 0.5009634494781494, "learning_rate": 5.651872399445215e-06, "loss": 0.0155, "step": 6280 }, { "epoch": 4.351229650155871, "grad_norm": 0.3669988811016083, "learning_rate": 5.65117891816921e-06, "loss": 0.0131, "step": 6281 }, { "epoch": 4.351922410807066, "grad_norm": 0.4883120059967041, "learning_rate": 5.6504854368932035e-06, "loss": 0.0086, "step": 6282 }, { "epoch": 4.352615171458261, "grad_norm": 0.6424862742424011, "learning_rate": 5.6497919556171984e-06, "loss": 0.0165, "step": 6283 }, { "epoch": 4.353307932109456, "grad_norm": 0.42300087213516235, "learning_rate": 5.649098474341193e-06, "loss": 0.0144, "step": 6284 }, { "epoch": 4.354000692760652, "grad_norm": 0.33685287833213806, "learning_rate": 5.6484049930651874e-06, "loss": 0.0121, "step": 6285 }, { "epoch": 4.354693453411846, "grad_norm": 0.38402703404426575, "learning_rate": 5.647711511789182e-06, "loss": 0.0146, "step": 6286 }, { "epoch": 4.355386214063041, "grad_norm": 0.4445732533931732, "learning_rate": 5.6470180305131765e-06, "loss": 0.0178, "step": 6287 }, { "epoch": 4.3560789747142366, "grad_norm": 0.5329450964927673, "learning_rate": 5.646324549237171e-06, "loss": 0.0165, "step": 6288 }, { "epoch": 4.356771735365431, "grad_norm": 0.44079670310020447, "learning_rate": 5.6456310679611655e-06, "loss": 0.0141, "step": 6289 }, { "epoch": 4.357464496016626, "grad_norm": 0.5818588137626648, "learning_rate": 5.6449375866851596e-06, "loss": 0.0192, "step": 6290 }, { "epoch": 4.3581572566678215, "grad_norm": 0.4807320833206177, "learning_rate": 5.6442441054091545e-06, "loss": 0.0147, "step": 6291 }, { "epoch": 4.358850017319016, "grad_norm": 0.45464131236076355, "learning_rate": 5.643550624133149e-06, "loss": 0.0124, "step": 6292 }, { "epoch": 4.359542777970211, "grad_norm": 0.4575372040271759, "learning_rate": 5.6428571428571435e-06, "loss": 0.0171, "step": 6293 }, { "epoch": 4.3602355386214064, "grad_norm": 0.5620321035385132, "learning_rate": 5.6421636615811385e-06, "loss": 0.0159, "step": 6294 }, { "epoch": 4.360928299272602, "grad_norm": 0.407573401927948, "learning_rate": 5.641470180305132e-06, "loss": 0.0148, "step": 6295 }, { "epoch": 4.361621059923796, "grad_norm": 0.48125845193862915, "learning_rate": 5.640776699029127e-06, "loss": 0.0174, "step": 6296 }, { "epoch": 4.362313820574991, "grad_norm": 0.5339037179946899, "learning_rate": 5.640083217753121e-06, "loss": 0.016, "step": 6297 }, { "epoch": 4.363006581226187, "grad_norm": 0.5509049296379089, "learning_rate": 5.639389736477116e-06, "loss": 0.0168, "step": 6298 }, { "epoch": 4.363699341877381, "grad_norm": 0.47088614106178284, "learning_rate": 5.638696255201111e-06, "loss": 0.0186, "step": 6299 }, { "epoch": 4.364392102528576, "grad_norm": 0.6244907975196838, "learning_rate": 5.638002773925104e-06, "loss": 0.0148, "step": 6300 }, { "epoch": 4.365084863179772, "grad_norm": 0.4153135120868683, "learning_rate": 5.637309292649099e-06, "loss": 0.0131, "step": 6301 }, { "epoch": 4.365777623830966, "grad_norm": 0.452224463224411, "learning_rate": 5.636615811373093e-06, "loss": 0.014, "step": 6302 }, { "epoch": 4.366470384482161, "grad_norm": 0.4876563549041748, "learning_rate": 5.635922330097088e-06, "loss": 0.0155, "step": 6303 }, { "epoch": 4.367163145133357, "grad_norm": 0.36006730794906616, "learning_rate": 5.635228848821083e-06, "loss": 0.0111, "step": 6304 }, { "epoch": 4.367855905784552, "grad_norm": 0.545689046382904, "learning_rate": 5.634535367545077e-06, "loss": 0.0178, "step": 6305 }, { "epoch": 4.368548666435746, "grad_norm": 0.44589248299598694, "learning_rate": 5.633841886269072e-06, "loss": 0.0168, "step": 6306 }, { "epoch": 4.3692414270869415, "grad_norm": 0.3454737365245819, "learning_rate": 5.633148404993065e-06, "loss": 0.0103, "step": 6307 }, { "epoch": 4.369934187738137, "grad_norm": 0.40857359766960144, "learning_rate": 5.63245492371706e-06, "loss": 0.0145, "step": 6308 }, { "epoch": 4.370626948389331, "grad_norm": 0.42181888222694397, "learning_rate": 5.631761442441055e-06, "loss": 0.0137, "step": 6309 }, { "epoch": 4.3713197090405265, "grad_norm": 0.5007210969924927, "learning_rate": 5.631067961165049e-06, "loss": 0.0117, "step": 6310 }, { "epoch": 4.372012469691722, "grad_norm": 0.6799827218055725, "learning_rate": 5.630374479889044e-06, "loss": 0.0161, "step": 6311 }, { "epoch": 4.372705230342916, "grad_norm": 0.4871334433555603, "learning_rate": 5.629680998613037e-06, "loss": 0.0127, "step": 6312 }, { "epoch": 4.373397990994111, "grad_norm": 0.3891102373600006, "learning_rate": 5.628987517337032e-06, "loss": 0.01, "step": 6313 }, { "epoch": 4.374090751645307, "grad_norm": 0.4211445152759552, "learning_rate": 5.628294036061027e-06, "loss": 0.0132, "step": 6314 }, { "epoch": 4.374783512296502, "grad_norm": 0.5274110436439514, "learning_rate": 5.627600554785021e-06, "loss": 0.0112, "step": 6315 }, { "epoch": 4.375476272947696, "grad_norm": 0.7541940212249756, "learning_rate": 5.626907073509016e-06, "loss": 0.023, "step": 6316 }, { "epoch": 4.376169033598892, "grad_norm": 0.5094373226165771, "learning_rate": 5.62621359223301e-06, "loss": 0.0155, "step": 6317 }, { "epoch": 4.376861794250087, "grad_norm": 0.5532792210578918, "learning_rate": 5.625520110957004e-06, "loss": 0.0174, "step": 6318 }, { "epoch": 4.377554554901281, "grad_norm": 0.5033500790596008, "learning_rate": 5.624826629680999e-06, "loss": 0.0158, "step": 6319 }, { "epoch": 4.378247315552477, "grad_norm": 0.3506750464439392, "learning_rate": 5.624133148404993e-06, "loss": 0.011, "step": 6320 }, { "epoch": 4.378940076203672, "grad_norm": 0.7826740741729736, "learning_rate": 5.623439667128988e-06, "loss": 0.0263, "step": 6321 }, { "epoch": 4.379632836854866, "grad_norm": 0.5336676239967346, "learning_rate": 5.622746185852982e-06, "loss": 0.0159, "step": 6322 }, { "epoch": 4.3803255975060615, "grad_norm": 0.6048029661178589, "learning_rate": 5.622052704576977e-06, "loss": 0.016, "step": 6323 }, { "epoch": 4.381018358157257, "grad_norm": 0.4789120554924011, "learning_rate": 5.621359223300972e-06, "loss": 0.0192, "step": 6324 }, { "epoch": 4.381711118808452, "grad_norm": 0.38032591342926025, "learning_rate": 5.620665742024965e-06, "loss": 0.0118, "step": 6325 }, { "epoch": 4.3824038794596465, "grad_norm": 0.46888935565948486, "learning_rate": 5.61997226074896e-06, "loss": 0.0169, "step": 6326 }, { "epoch": 4.383096640110842, "grad_norm": 0.6135951280593872, "learning_rate": 5.619278779472954e-06, "loss": 0.0153, "step": 6327 }, { "epoch": 4.383789400762037, "grad_norm": 0.43433281779289246, "learning_rate": 5.618585298196949e-06, "loss": 0.015, "step": 6328 }, { "epoch": 4.384482161413231, "grad_norm": 0.3692700266838074, "learning_rate": 5.617891816920944e-06, "loss": 0.0103, "step": 6329 }, { "epoch": 4.385174922064427, "grad_norm": 0.4402207136154175, "learning_rate": 5.6171983356449374e-06, "loss": 0.0171, "step": 6330 }, { "epoch": 4.385867682715622, "grad_norm": 0.4314385652542114, "learning_rate": 5.616504854368932e-06, "loss": 0.0144, "step": 6331 }, { "epoch": 4.386560443366816, "grad_norm": 0.46023669838905334, "learning_rate": 5.6158113730929264e-06, "loss": 0.0122, "step": 6332 }, { "epoch": 4.387253204018012, "grad_norm": 0.5423911213874817, "learning_rate": 5.615117891816921e-06, "loss": 0.0178, "step": 6333 }, { "epoch": 4.387945964669207, "grad_norm": 0.406222403049469, "learning_rate": 5.614424410540916e-06, "loss": 0.015, "step": 6334 }, { "epoch": 4.388638725320401, "grad_norm": 0.37357595562934875, "learning_rate": 5.61373092926491e-06, "loss": 0.0142, "step": 6335 }, { "epoch": 4.389331485971597, "grad_norm": 0.5193958878517151, "learning_rate": 5.613037447988905e-06, "loss": 0.015, "step": 6336 }, { "epoch": 4.390024246622792, "grad_norm": 0.44530028104782104, "learning_rate": 5.6123439667128986e-06, "loss": 0.0184, "step": 6337 }, { "epoch": 4.390717007273987, "grad_norm": 0.6473617553710938, "learning_rate": 5.6116504854368935e-06, "loss": 0.016, "step": 6338 }, { "epoch": 4.391409767925182, "grad_norm": 0.4191620647907257, "learning_rate": 5.6109570041608884e-06, "loss": 0.0158, "step": 6339 }, { "epoch": 4.392102528576377, "grad_norm": 0.4953300952911377, "learning_rate": 5.6102635228848825e-06, "loss": 0.018, "step": 6340 }, { "epoch": 4.392795289227572, "grad_norm": 0.4880446493625641, "learning_rate": 5.6095700416088775e-06, "loss": 0.0172, "step": 6341 }, { "epoch": 4.3934880498787665, "grad_norm": 0.48921796679496765, "learning_rate": 5.608876560332871e-06, "loss": 0.0114, "step": 6342 }, { "epoch": 4.394180810529962, "grad_norm": 0.6088504791259766, "learning_rate": 5.608183079056866e-06, "loss": 0.0147, "step": 6343 }, { "epoch": 4.394873571181157, "grad_norm": 0.4283059239387512, "learning_rate": 5.6074895977808606e-06, "loss": 0.0152, "step": 6344 }, { "epoch": 4.395566331832352, "grad_norm": 0.4862416088581085, "learning_rate": 5.606796116504855e-06, "loss": 0.0125, "step": 6345 }, { "epoch": 4.396259092483547, "grad_norm": 0.506998598575592, "learning_rate": 5.60610263522885e-06, "loss": 0.0159, "step": 6346 }, { "epoch": 4.396951853134742, "grad_norm": 0.6488572359085083, "learning_rate": 5.605409153952843e-06, "loss": 0.0156, "step": 6347 }, { "epoch": 4.397644613785937, "grad_norm": 0.6373663544654846, "learning_rate": 5.604715672676838e-06, "loss": 0.0163, "step": 6348 }, { "epoch": 4.398337374437132, "grad_norm": 0.6178373694419861, "learning_rate": 5.604022191400833e-06, "loss": 0.0239, "step": 6349 }, { "epoch": 4.399030135088327, "grad_norm": 0.464708536863327, "learning_rate": 5.603328710124827e-06, "loss": 0.0148, "step": 6350 }, { "epoch": 4.399722895739522, "grad_norm": 0.40016040205955505, "learning_rate": 5.602635228848822e-06, "loss": 0.0123, "step": 6351 }, { "epoch": 4.400415656390717, "grad_norm": 0.5595857501029968, "learning_rate": 5.601941747572816e-06, "loss": 0.0156, "step": 6352 }, { "epoch": 4.401108417041912, "grad_norm": 0.3771488666534424, "learning_rate": 5.601248266296811e-06, "loss": 0.0126, "step": 6353 }, { "epoch": 4.401801177693107, "grad_norm": 0.3650773763656616, "learning_rate": 5.600554785020806e-06, "loss": 0.0136, "step": 6354 }, { "epoch": 4.402493938344302, "grad_norm": 0.5128456354141235, "learning_rate": 5.599861303744799e-06, "loss": 0.0147, "step": 6355 }, { "epoch": 4.403186698995497, "grad_norm": 0.4283827245235443, "learning_rate": 5.599167822468794e-06, "loss": 0.0182, "step": 6356 }, { "epoch": 4.403879459646692, "grad_norm": 0.46825936436653137, "learning_rate": 5.598474341192788e-06, "loss": 0.0129, "step": 6357 }, { "epoch": 4.404572220297887, "grad_norm": 0.6075336933135986, "learning_rate": 5.597780859916783e-06, "loss": 0.0207, "step": 6358 }, { "epoch": 4.405264980949082, "grad_norm": 0.42504626512527466, "learning_rate": 5.597087378640778e-06, "loss": 0.0208, "step": 6359 }, { "epoch": 4.405957741600277, "grad_norm": 0.3802050054073334, "learning_rate": 5.596393897364771e-06, "loss": 0.0119, "step": 6360 }, { "epoch": 4.406650502251472, "grad_norm": 0.42043280601501465, "learning_rate": 5.595700416088766e-06, "loss": 0.0133, "step": 6361 }, { "epoch": 4.407343262902667, "grad_norm": 0.409045934677124, "learning_rate": 5.59500693481276e-06, "loss": 0.0125, "step": 6362 }, { "epoch": 4.408036023553862, "grad_norm": 0.4809107482433319, "learning_rate": 5.594313453536755e-06, "loss": 0.0152, "step": 6363 }, { "epoch": 4.408728784205057, "grad_norm": 0.5782892107963562, "learning_rate": 5.59361997226075e-06, "loss": 0.0189, "step": 6364 }, { "epoch": 4.409421544856253, "grad_norm": 0.4007456600666046, "learning_rate": 5.592926490984744e-06, "loss": 0.0127, "step": 6365 }, { "epoch": 4.410114305507447, "grad_norm": 0.5185883045196533, "learning_rate": 5.592233009708738e-06, "loss": 0.0188, "step": 6366 }, { "epoch": 4.410807066158642, "grad_norm": 0.5212296843528748, "learning_rate": 5.591539528432732e-06, "loss": 0.017, "step": 6367 }, { "epoch": 4.4114998268098375, "grad_norm": 0.5395380258560181, "learning_rate": 5.590846047156727e-06, "loss": 0.0148, "step": 6368 }, { "epoch": 4.412192587461032, "grad_norm": 0.467286616563797, "learning_rate": 5.590152565880722e-06, "loss": 0.0175, "step": 6369 }, { "epoch": 4.412885348112227, "grad_norm": 0.45846062898635864, "learning_rate": 5.589459084604716e-06, "loss": 0.0139, "step": 6370 }, { "epoch": 4.4135781087634225, "grad_norm": 0.4456712603569031, "learning_rate": 5.588765603328711e-06, "loss": 0.0149, "step": 6371 }, { "epoch": 4.414270869414617, "grad_norm": 0.3855138123035431, "learning_rate": 5.588072122052704e-06, "loss": 0.0146, "step": 6372 }, { "epoch": 4.414963630065812, "grad_norm": 0.446366548538208, "learning_rate": 5.587378640776699e-06, "loss": 0.014, "step": 6373 }, { "epoch": 4.415656390717007, "grad_norm": 0.49780362844467163, "learning_rate": 5.586685159500694e-06, "loss": 0.0137, "step": 6374 }, { "epoch": 4.416349151368202, "grad_norm": 0.5019102692604065, "learning_rate": 5.585991678224688e-06, "loss": 0.0153, "step": 6375 }, { "epoch": 4.417041912019397, "grad_norm": 0.442880779504776, "learning_rate": 5.585298196948683e-06, "loss": 0.0154, "step": 6376 }, { "epoch": 4.417734672670592, "grad_norm": 0.5280331373214722, "learning_rate": 5.5846047156726764e-06, "loss": 0.0178, "step": 6377 }, { "epoch": 4.418427433321788, "grad_norm": 0.5102010369300842, "learning_rate": 5.583911234396671e-06, "loss": 0.0138, "step": 6378 }, { "epoch": 4.419120193972982, "grad_norm": 0.3986343443393707, "learning_rate": 5.583217753120666e-06, "loss": 0.014, "step": 6379 }, { "epoch": 4.419812954624177, "grad_norm": 0.47398659586906433, "learning_rate": 5.58252427184466e-06, "loss": 0.0132, "step": 6380 }, { "epoch": 4.420505715275373, "grad_norm": 0.38692718744277954, "learning_rate": 5.581830790568655e-06, "loss": 0.0145, "step": 6381 }, { "epoch": 4.421198475926567, "grad_norm": 0.48547956347465515, "learning_rate": 5.581137309292649e-06, "loss": 0.0185, "step": 6382 }, { "epoch": 4.421891236577762, "grad_norm": 0.37141096591949463, "learning_rate": 5.580443828016644e-06, "loss": 0.0109, "step": 6383 }, { "epoch": 4.422583997228958, "grad_norm": 0.39800119400024414, "learning_rate": 5.579750346740639e-06, "loss": 0.0091, "step": 6384 }, { "epoch": 4.423276757880153, "grad_norm": 0.3927205204963684, "learning_rate": 5.5790568654646325e-06, "loss": 0.0122, "step": 6385 }, { "epoch": 4.423969518531347, "grad_norm": 0.463717520236969, "learning_rate": 5.5783633841886274e-06, "loss": 0.0168, "step": 6386 }, { "epoch": 4.4246622791825425, "grad_norm": 0.43868792057037354, "learning_rate": 5.5776699029126215e-06, "loss": 0.0184, "step": 6387 }, { "epoch": 4.425355039833738, "grad_norm": 0.3682408034801483, "learning_rate": 5.5769764216366165e-06, "loss": 0.0123, "step": 6388 }, { "epoch": 4.426047800484932, "grad_norm": 0.6563029289245605, "learning_rate": 5.576282940360611e-06, "loss": 0.0244, "step": 6389 }, { "epoch": 4.4267405611361275, "grad_norm": 0.4049411416053772, "learning_rate": 5.575589459084605e-06, "loss": 0.0116, "step": 6390 }, { "epoch": 4.427433321787323, "grad_norm": 0.5479726195335388, "learning_rate": 5.5748959778085996e-06, "loss": 0.0135, "step": 6391 }, { "epoch": 4.428126082438517, "grad_norm": 0.6239023208618164, "learning_rate": 5.574202496532594e-06, "loss": 0.0172, "step": 6392 }, { "epoch": 4.428818843089712, "grad_norm": 0.542202889919281, "learning_rate": 5.573509015256589e-06, "loss": 0.0189, "step": 6393 }, { "epoch": 4.429511603740908, "grad_norm": 0.470120370388031, "learning_rate": 5.5728155339805835e-06, "loss": 0.0158, "step": 6394 }, { "epoch": 4.430204364392102, "grad_norm": 0.6321980357170105, "learning_rate": 5.572122052704577e-06, "loss": 0.0153, "step": 6395 }, { "epoch": 4.430897125043297, "grad_norm": 0.40127214789390564, "learning_rate": 5.571428571428572e-06, "loss": 0.0164, "step": 6396 }, { "epoch": 4.431589885694493, "grad_norm": 0.3545286953449249, "learning_rate": 5.570735090152566e-06, "loss": 0.0143, "step": 6397 }, { "epoch": 4.432282646345688, "grad_norm": 0.5121960639953613, "learning_rate": 5.570041608876561e-06, "loss": 0.0138, "step": 6398 }, { "epoch": 4.432975406996882, "grad_norm": 0.4815506637096405, "learning_rate": 5.569348127600556e-06, "loss": 0.0146, "step": 6399 }, { "epoch": 4.433668167648078, "grad_norm": 0.4875757694244385, "learning_rate": 5.56865464632455e-06, "loss": 0.0148, "step": 6400 }, { "epoch": 4.434360928299273, "grad_norm": 0.5487003922462463, "learning_rate": 5.567961165048545e-06, "loss": 0.0163, "step": 6401 }, { "epoch": 4.435053688950467, "grad_norm": 0.43102291226387024, "learning_rate": 5.567267683772538e-06, "loss": 0.0162, "step": 6402 }, { "epoch": 4.4357464496016625, "grad_norm": 0.4879804253578186, "learning_rate": 5.566574202496533e-06, "loss": 0.0203, "step": 6403 }, { "epoch": 4.436439210252858, "grad_norm": 0.49057435989379883, "learning_rate": 5.565880721220528e-06, "loss": 0.0116, "step": 6404 }, { "epoch": 4.437131970904053, "grad_norm": 0.49575570225715637, "learning_rate": 5.565187239944522e-06, "loss": 0.0156, "step": 6405 }, { "epoch": 4.4378247315552475, "grad_norm": 0.516643762588501, "learning_rate": 5.564493758668517e-06, "loss": 0.0208, "step": 6406 }, { "epoch": 4.438517492206443, "grad_norm": 0.3851929306983948, "learning_rate": 5.56380027739251e-06, "loss": 0.0166, "step": 6407 }, { "epoch": 4.439210252857638, "grad_norm": 0.4506097137928009, "learning_rate": 5.563106796116505e-06, "loss": 0.0159, "step": 6408 }, { "epoch": 4.439903013508832, "grad_norm": 0.3984452486038208, "learning_rate": 5.5624133148405e-06, "loss": 0.0118, "step": 6409 }, { "epoch": 4.440595774160028, "grad_norm": 0.5958320498466492, "learning_rate": 5.561719833564494e-06, "loss": 0.0127, "step": 6410 }, { "epoch": 4.441288534811223, "grad_norm": 0.4138931632041931, "learning_rate": 5.561026352288489e-06, "loss": 0.0117, "step": 6411 }, { "epoch": 4.441981295462417, "grad_norm": 0.5055581331253052, "learning_rate": 5.560332871012483e-06, "loss": 0.0178, "step": 6412 }, { "epoch": 4.442674056113613, "grad_norm": 0.4865836799144745, "learning_rate": 5.559639389736478e-06, "loss": 0.0151, "step": 6413 }, { "epoch": 4.443366816764808, "grad_norm": 0.6675582528114319, "learning_rate": 5.558945908460472e-06, "loss": 0.0194, "step": 6414 }, { "epoch": 4.444059577416002, "grad_norm": 0.4479790925979614, "learning_rate": 5.558252427184466e-06, "loss": 0.019, "step": 6415 }, { "epoch": 4.444752338067198, "grad_norm": 0.5455640554428101, "learning_rate": 5.557558945908461e-06, "loss": 0.0196, "step": 6416 }, { "epoch": 4.445445098718393, "grad_norm": 0.47233420610427856, "learning_rate": 5.556865464632455e-06, "loss": 0.0132, "step": 6417 }, { "epoch": 4.446137859369588, "grad_norm": 0.4319811463356018, "learning_rate": 5.55617198335645e-06, "loss": 0.0152, "step": 6418 }, { "epoch": 4.4468306200207826, "grad_norm": 0.4056445062160492, "learning_rate": 5.555478502080445e-06, "loss": 0.0138, "step": 6419 }, { "epoch": 4.447523380671978, "grad_norm": 0.7867854833602905, "learning_rate": 5.554785020804438e-06, "loss": 0.0203, "step": 6420 }, { "epoch": 4.448216141323173, "grad_norm": 0.4006311893463135, "learning_rate": 5.554091539528433e-06, "loss": 0.0147, "step": 6421 }, { "epoch": 4.4489089019743675, "grad_norm": 0.40011733770370483, "learning_rate": 5.553398058252427e-06, "loss": 0.0177, "step": 6422 }, { "epoch": 4.449601662625563, "grad_norm": 0.5467776656150818, "learning_rate": 5.552704576976422e-06, "loss": 0.02, "step": 6423 }, { "epoch": 4.450294423276758, "grad_norm": 0.47469064593315125, "learning_rate": 5.552011095700417e-06, "loss": 0.0115, "step": 6424 }, { "epoch": 4.450987183927953, "grad_norm": 0.4306032359600067, "learning_rate": 5.55131761442441e-06, "loss": 0.0183, "step": 6425 }, { "epoch": 4.451679944579148, "grad_norm": 0.42458483576774597, "learning_rate": 5.550624133148405e-06, "loss": 0.0116, "step": 6426 }, { "epoch": 4.452372705230343, "grad_norm": 0.4996929168701172, "learning_rate": 5.549930651872399e-06, "loss": 0.0188, "step": 6427 }, { "epoch": 4.453065465881538, "grad_norm": 0.41575998067855835, "learning_rate": 5.549237170596394e-06, "loss": 0.0165, "step": 6428 }, { "epoch": 4.453758226532733, "grad_norm": 0.3469735085964203, "learning_rate": 5.548543689320389e-06, "loss": 0.0089, "step": 6429 }, { "epoch": 4.454450987183928, "grad_norm": 0.4437747597694397, "learning_rate": 5.547850208044383e-06, "loss": 0.016, "step": 6430 }, { "epoch": 4.455143747835123, "grad_norm": 0.5774936676025391, "learning_rate": 5.547156726768378e-06, "loss": 0.0272, "step": 6431 }, { "epoch": 4.455836508486318, "grad_norm": 0.47543248534202576, "learning_rate": 5.5464632454923715e-06, "loss": 0.0122, "step": 6432 }, { "epoch": 4.456529269137513, "grad_norm": 0.487699955701828, "learning_rate": 5.5457697642163664e-06, "loss": 0.0179, "step": 6433 }, { "epoch": 4.457222029788708, "grad_norm": 0.4815370738506317, "learning_rate": 5.545076282940361e-06, "loss": 0.0157, "step": 6434 }, { "epoch": 4.457914790439903, "grad_norm": 0.5126708745956421, "learning_rate": 5.5443828016643555e-06, "loss": 0.0149, "step": 6435 }, { "epoch": 4.458607551091098, "grad_norm": 0.5073757171630859, "learning_rate": 5.54368932038835e-06, "loss": 0.0144, "step": 6436 }, { "epoch": 4.459300311742293, "grad_norm": 0.4443979263305664, "learning_rate": 5.542995839112344e-06, "loss": 0.0155, "step": 6437 }, { "epoch": 4.459993072393488, "grad_norm": 0.34232097864151, "learning_rate": 5.5423023578363386e-06, "loss": 0.0096, "step": 6438 }, { "epoch": 4.460685833044683, "grad_norm": 0.5201405882835388, "learning_rate": 5.5416088765603335e-06, "loss": 0.0172, "step": 6439 }, { "epoch": 4.461378593695878, "grad_norm": 0.3482005298137665, "learning_rate": 5.540915395284328e-06, "loss": 0.0118, "step": 6440 }, { "epoch": 4.462071354347073, "grad_norm": 0.720801591873169, "learning_rate": 5.5402219140083225e-06, "loss": 0.0155, "step": 6441 }, { "epoch": 4.462764114998268, "grad_norm": 0.38182923197746277, "learning_rate": 5.539528432732317e-06, "loss": 0.0179, "step": 6442 }, { "epoch": 4.463456875649463, "grad_norm": 0.375409871339798, "learning_rate": 5.5388349514563115e-06, "loss": 0.0114, "step": 6443 }, { "epoch": 4.464149636300658, "grad_norm": 0.41085493564605713, "learning_rate": 5.538141470180306e-06, "loss": 0.0133, "step": 6444 }, { "epoch": 4.464842396951854, "grad_norm": 0.42861318588256836, "learning_rate": 5.5374479889043e-06, "loss": 0.0152, "step": 6445 }, { "epoch": 4.465535157603048, "grad_norm": 0.4964158535003662, "learning_rate": 5.536754507628295e-06, "loss": 0.0161, "step": 6446 }, { "epoch": 4.466227918254243, "grad_norm": 0.3339213728904724, "learning_rate": 5.536061026352289e-06, "loss": 0.013, "step": 6447 }, { "epoch": 4.4669206789054385, "grad_norm": 0.5683286190032959, "learning_rate": 5.535367545076284e-06, "loss": 0.0189, "step": 6448 }, { "epoch": 4.467613439556633, "grad_norm": 0.7219005227088928, "learning_rate": 5.534674063800279e-06, "loss": 0.0176, "step": 6449 }, { "epoch": 4.468306200207828, "grad_norm": 0.3752721846103668, "learning_rate": 5.533980582524272e-06, "loss": 0.0115, "step": 6450 }, { "epoch": 4.4689989608590235, "grad_norm": 0.5969683527946472, "learning_rate": 5.533287101248267e-06, "loss": 0.0149, "step": 6451 }, { "epoch": 4.469691721510218, "grad_norm": 0.4072873890399933, "learning_rate": 5.532593619972261e-06, "loss": 0.0148, "step": 6452 }, { "epoch": 4.470384482161413, "grad_norm": 0.6940498352050781, "learning_rate": 5.531900138696256e-06, "loss": 0.019, "step": 6453 }, { "epoch": 4.471077242812608, "grad_norm": 0.5742140412330627, "learning_rate": 5.531206657420251e-06, "loss": 0.0138, "step": 6454 }, { "epoch": 4.471770003463803, "grad_norm": 0.526165783405304, "learning_rate": 5.530513176144244e-06, "loss": 0.0163, "step": 6455 }, { "epoch": 4.472462764114998, "grad_norm": 0.4514405131340027, "learning_rate": 5.529819694868239e-06, "loss": 0.0171, "step": 6456 }, { "epoch": 4.473155524766193, "grad_norm": 0.40915215015411377, "learning_rate": 5.529126213592233e-06, "loss": 0.0119, "step": 6457 }, { "epoch": 4.473848285417389, "grad_norm": 0.5459065437316895, "learning_rate": 5.528432732316228e-06, "loss": 0.014, "step": 6458 }, { "epoch": 4.474541046068583, "grad_norm": 0.46588218212127686, "learning_rate": 5.527739251040223e-06, "loss": 0.0123, "step": 6459 }, { "epoch": 4.475233806719778, "grad_norm": 0.41297537088394165, "learning_rate": 5.527045769764217e-06, "loss": 0.0162, "step": 6460 }, { "epoch": 4.475926567370974, "grad_norm": 0.8941429853439331, "learning_rate": 5.526352288488212e-06, "loss": 0.0218, "step": 6461 }, { "epoch": 4.476619328022168, "grad_norm": 0.451428085565567, "learning_rate": 5.525658807212205e-06, "loss": 0.016, "step": 6462 }, { "epoch": 4.477312088673363, "grad_norm": 0.5240730047225952, "learning_rate": 5.5249653259362e-06, "loss": 0.0149, "step": 6463 }, { "epoch": 4.478004849324559, "grad_norm": 0.4977891445159912, "learning_rate": 5.524271844660195e-06, "loss": 0.0148, "step": 6464 }, { "epoch": 4.478697609975754, "grad_norm": 0.6426070928573608, "learning_rate": 5.523578363384189e-06, "loss": 0.014, "step": 6465 }, { "epoch": 4.479390370626948, "grad_norm": 0.5414949655532837, "learning_rate": 5.522884882108184e-06, "loss": 0.0195, "step": 6466 }, { "epoch": 4.4800831312781435, "grad_norm": 0.47537606954574585, "learning_rate": 5.522191400832177e-06, "loss": 0.0134, "step": 6467 }, { "epoch": 4.480775891929339, "grad_norm": 0.5032382607460022, "learning_rate": 5.521497919556172e-06, "loss": 0.0129, "step": 6468 }, { "epoch": 4.481468652580533, "grad_norm": 0.5442067980766296, "learning_rate": 5.520804438280167e-06, "loss": 0.0128, "step": 6469 }, { "epoch": 4.4821614132317285, "grad_norm": 0.526715874671936, "learning_rate": 5.520110957004161e-06, "loss": 0.0181, "step": 6470 }, { "epoch": 4.482854173882924, "grad_norm": 0.6059287786483765, "learning_rate": 5.519417475728156e-06, "loss": 0.0167, "step": 6471 }, { "epoch": 4.483546934534118, "grad_norm": 0.5071312785148621, "learning_rate": 5.51872399445215e-06, "loss": 0.0156, "step": 6472 }, { "epoch": 4.484239695185313, "grad_norm": 0.5615072250366211, "learning_rate": 5.518030513176144e-06, "loss": 0.0188, "step": 6473 }, { "epoch": 4.484932455836509, "grad_norm": 0.37299078702926636, "learning_rate": 5.517337031900139e-06, "loss": 0.0155, "step": 6474 }, { "epoch": 4.485625216487703, "grad_norm": 0.438225120306015, "learning_rate": 5.516643550624133e-06, "loss": 0.0151, "step": 6475 }, { "epoch": 4.486317977138898, "grad_norm": 0.49470794200897217, "learning_rate": 5.515950069348128e-06, "loss": 0.0139, "step": 6476 }, { "epoch": 4.487010737790094, "grad_norm": 0.4833485186100006, "learning_rate": 5.515256588072122e-06, "loss": 0.0175, "step": 6477 }, { "epoch": 4.487703498441289, "grad_norm": 0.42016899585723877, "learning_rate": 5.514563106796117e-06, "loss": 0.0119, "step": 6478 }, { "epoch": 4.488396259092483, "grad_norm": 0.5711908340454102, "learning_rate": 5.513869625520112e-06, "loss": 0.0169, "step": 6479 }, { "epoch": 4.489089019743679, "grad_norm": 0.5124261975288391, "learning_rate": 5.5131761442441054e-06, "loss": 0.0195, "step": 6480 }, { "epoch": 4.489781780394874, "grad_norm": 0.4338620901107788, "learning_rate": 5.5124826629681e-06, "loss": 0.0172, "step": 6481 }, { "epoch": 4.490474541046068, "grad_norm": 0.6072943210601807, "learning_rate": 5.5117891816920945e-06, "loss": 0.0162, "step": 6482 }, { "epoch": 4.4911673016972635, "grad_norm": 0.526709794998169, "learning_rate": 5.511095700416089e-06, "loss": 0.0167, "step": 6483 }, { "epoch": 4.491860062348459, "grad_norm": 0.4411545693874359, "learning_rate": 5.510402219140084e-06, "loss": 0.0153, "step": 6484 }, { "epoch": 4.492552822999654, "grad_norm": 0.5905636548995972, "learning_rate": 5.5097087378640776e-06, "loss": 0.0211, "step": 6485 }, { "epoch": 4.4932455836508485, "grad_norm": 0.48006075620651245, "learning_rate": 5.5090152565880725e-06, "loss": 0.0164, "step": 6486 }, { "epoch": 4.493938344302044, "grad_norm": 0.4034219980239868, "learning_rate": 5.508321775312067e-06, "loss": 0.012, "step": 6487 }, { "epoch": 4.494631104953239, "grad_norm": 0.487667441368103, "learning_rate": 5.5076282940360615e-06, "loss": 0.0113, "step": 6488 }, { "epoch": 4.495323865604433, "grad_norm": 0.48509952425956726, "learning_rate": 5.5069348127600565e-06, "loss": 0.0124, "step": 6489 }, { "epoch": 4.496016626255629, "grad_norm": 0.6027198433876038, "learning_rate": 5.5062413314840505e-06, "loss": 0.0254, "step": 6490 }, { "epoch": 4.496709386906824, "grad_norm": 0.43159589171409607, "learning_rate": 5.5055478502080455e-06, "loss": 0.0168, "step": 6491 }, { "epoch": 4.497402147558018, "grad_norm": 0.5612128376960754, "learning_rate": 5.504854368932039e-06, "loss": 0.0182, "step": 6492 }, { "epoch": 4.498094908209214, "grad_norm": 0.6271775364875793, "learning_rate": 5.504160887656034e-06, "loss": 0.0164, "step": 6493 }, { "epoch": 4.498787668860409, "grad_norm": 0.5226729512214661, "learning_rate": 5.503467406380029e-06, "loss": 0.0168, "step": 6494 }, { "epoch": 4.499480429511603, "grad_norm": 0.5027127265930176, "learning_rate": 5.502773925104023e-06, "loss": 0.0167, "step": 6495 }, { "epoch": 4.500173190162799, "grad_norm": 0.44412973523139954, "learning_rate": 5.502080443828018e-06, "loss": 0.0139, "step": 6496 }, { "epoch": 4.500865950813994, "grad_norm": 0.8797883987426758, "learning_rate": 5.501386962552011e-06, "loss": 0.0178, "step": 6497 }, { "epoch": 4.501558711465189, "grad_norm": 0.4727875292301178, "learning_rate": 5.500693481276006e-06, "loss": 0.0125, "step": 6498 }, { "epoch": 4.5022514721163835, "grad_norm": 0.5474968552589417, "learning_rate": 5.500000000000001e-06, "loss": 0.0188, "step": 6499 }, { "epoch": 4.502944232767579, "grad_norm": 0.3545326590538025, "learning_rate": 5.499306518723995e-06, "loss": 0.0113, "step": 6500 }, { "epoch": 4.503636993418774, "grad_norm": 0.48472627997398376, "learning_rate": 5.49861303744799e-06, "loss": 0.0198, "step": 6501 }, { "epoch": 4.5043297540699685, "grad_norm": 0.3662717640399933, "learning_rate": 5.497919556171983e-06, "loss": 0.0111, "step": 6502 }, { "epoch": 4.505022514721164, "grad_norm": 0.4518069326877594, "learning_rate": 5.497226074895978e-06, "loss": 0.0162, "step": 6503 }, { "epoch": 4.505715275372359, "grad_norm": 0.39849138259887695, "learning_rate": 5.496532593619973e-06, "loss": 0.0151, "step": 6504 }, { "epoch": 4.506408036023554, "grad_norm": 0.6232315897941589, "learning_rate": 5.495839112343967e-06, "loss": 0.0123, "step": 6505 }, { "epoch": 4.507100796674749, "grad_norm": 0.6459498405456543, "learning_rate": 5.495145631067962e-06, "loss": 0.018, "step": 6506 }, { "epoch": 4.507793557325944, "grad_norm": 0.504284679889679, "learning_rate": 5.494452149791956e-06, "loss": 0.0232, "step": 6507 }, { "epoch": 4.508486317977139, "grad_norm": 0.46955856680870056, "learning_rate": 5.493758668515951e-06, "loss": 0.0139, "step": 6508 }, { "epoch": 4.509179078628334, "grad_norm": 0.4541158676147461, "learning_rate": 5.493065187239946e-06, "loss": 0.0144, "step": 6509 }, { "epoch": 4.509871839279529, "grad_norm": 0.4640336036682129, "learning_rate": 5.492371705963939e-06, "loss": 0.0189, "step": 6510 }, { "epoch": 4.510564599930724, "grad_norm": 0.4979221522808075, "learning_rate": 5.491678224687934e-06, "loss": 0.0196, "step": 6511 }, { "epoch": 4.511257360581919, "grad_norm": 0.45296889543533325, "learning_rate": 5.490984743411928e-06, "loss": 0.0159, "step": 6512 }, { "epoch": 4.511950121233114, "grad_norm": 0.4820176064968109, "learning_rate": 5.490291262135923e-06, "loss": 0.0129, "step": 6513 }, { "epoch": 4.512642881884309, "grad_norm": 0.4541758894920349, "learning_rate": 5.489597780859918e-06, "loss": 0.0115, "step": 6514 }, { "epoch": 4.513335642535504, "grad_norm": 0.4782460629940033, "learning_rate": 5.488904299583911e-06, "loss": 0.0155, "step": 6515 }, { "epoch": 4.514028403186699, "grad_norm": 0.4050765335559845, "learning_rate": 5.488210818307906e-06, "loss": 0.013, "step": 6516 }, { "epoch": 4.514721163837894, "grad_norm": 0.5570684671401978, "learning_rate": 5.4875173370319e-06, "loss": 0.0171, "step": 6517 }, { "epoch": 4.515413924489089, "grad_norm": 0.3821154832839966, "learning_rate": 5.486823855755895e-06, "loss": 0.016, "step": 6518 }, { "epoch": 4.516106685140284, "grad_norm": 0.44980764389038086, "learning_rate": 5.48613037447989e-06, "loss": 0.0167, "step": 6519 }, { "epoch": 4.516799445791479, "grad_norm": 0.47033196687698364, "learning_rate": 5.485436893203884e-06, "loss": 0.0159, "step": 6520 }, { "epoch": 4.517492206442674, "grad_norm": 0.6494829058647156, "learning_rate": 5.484743411927878e-06, "loss": 0.0184, "step": 6521 }, { "epoch": 4.518184967093869, "grad_norm": 0.43515223264694214, "learning_rate": 5.484049930651872e-06, "loss": 0.0152, "step": 6522 }, { "epoch": 4.518877727745064, "grad_norm": 0.4424087405204773, "learning_rate": 5.483356449375867e-06, "loss": 0.0124, "step": 6523 }, { "epoch": 4.519570488396259, "grad_norm": 0.8619319796562195, "learning_rate": 5.482662968099862e-06, "loss": 0.0182, "step": 6524 }, { "epoch": 4.520263249047455, "grad_norm": 0.5138509273529053, "learning_rate": 5.481969486823856e-06, "loss": 0.0123, "step": 6525 }, { "epoch": 4.520956009698649, "grad_norm": 0.4272022843360901, "learning_rate": 5.481276005547851e-06, "loss": 0.0116, "step": 6526 }, { "epoch": 4.521648770349844, "grad_norm": 0.35826924443244934, "learning_rate": 5.4805825242718444e-06, "loss": 0.0109, "step": 6527 }, { "epoch": 4.5223415310010395, "grad_norm": 0.46223559975624084, "learning_rate": 5.479889042995839e-06, "loss": 0.015, "step": 6528 }, { "epoch": 4.523034291652234, "grad_norm": 0.42442476749420166, "learning_rate": 5.479195561719834e-06, "loss": 0.0138, "step": 6529 }, { "epoch": 4.523727052303429, "grad_norm": 0.4605258107185364, "learning_rate": 5.478502080443828e-06, "loss": 0.0135, "step": 6530 }, { "epoch": 4.5244198129546245, "grad_norm": 0.6113325953483582, "learning_rate": 5.477808599167823e-06, "loss": 0.0199, "step": 6531 }, { "epoch": 4.525112573605819, "grad_norm": 0.6036701202392578, "learning_rate": 5.4771151178918166e-06, "loss": 0.0179, "step": 6532 }, { "epoch": 4.525805334257014, "grad_norm": 0.650604248046875, "learning_rate": 5.4764216366158115e-06, "loss": 0.0153, "step": 6533 }, { "epoch": 4.526498094908209, "grad_norm": 0.663766622543335, "learning_rate": 5.4757281553398064e-06, "loss": 0.0117, "step": 6534 }, { "epoch": 4.527190855559404, "grad_norm": 0.4829460680484772, "learning_rate": 5.4750346740638005e-06, "loss": 0.0165, "step": 6535 }, { "epoch": 4.527883616210599, "grad_norm": 0.4579041004180908, "learning_rate": 5.4743411927877955e-06, "loss": 0.0131, "step": 6536 }, { "epoch": 4.528576376861794, "grad_norm": 0.49057334661483765, "learning_rate": 5.4736477115117895e-06, "loss": 0.013, "step": 6537 }, { "epoch": 4.529269137512989, "grad_norm": 0.4914683699607849, "learning_rate": 5.4729542302357845e-06, "loss": 0.0176, "step": 6538 }, { "epoch": 4.529961898164184, "grad_norm": 0.7309079766273499, "learning_rate": 5.472260748959779e-06, "loss": 0.0174, "step": 6539 }, { "epoch": 4.530654658815379, "grad_norm": 0.5123947858810425, "learning_rate": 5.471567267683773e-06, "loss": 0.0143, "step": 6540 }, { "epoch": 4.531347419466575, "grad_norm": 0.6903588175773621, "learning_rate": 5.470873786407768e-06, "loss": 0.0175, "step": 6541 }, { "epoch": 4.532040180117769, "grad_norm": 0.4115409255027771, "learning_rate": 5.470180305131762e-06, "loss": 0.0139, "step": 6542 }, { "epoch": 4.532732940768964, "grad_norm": 0.5724513530731201, "learning_rate": 5.469486823855757e-06, "loss": 0.0178, "step": 6543 }, { "epoch": 4.5334257014201595, "grad_norm": 0.5521072745323181, "learning_rate": 5.4687933425797515e-06, "loss": 0.0156, "step": 6544 }, { "epoch": 4.534118462071355, "grad_norm": 0.4855845868587494, "learning_rate": 5.468099861303745e-06, "loss": 0.0155, "step": 6545 }, { "epoch": 4.534811222722549, "grad_norm": 0.3734821081161499, "learning_rate": 5.46740638002774e-06, "loss": 0.0112, "step": 6546 }, { "epoch": 4.5355039833737445, "grad_norm": 0.3924940526485443, "learning_rate": 5.466712898751734e-06, "loss": 0.0121, "step": 6547 }, { "epoch": 4.53619674402494, "grad_norm": 0.47242504358291626, "learning_rate": 5.466019417475729e-06, "loss": 0.025, "step": 6548 }, { "epoch": 4.536889504676134, "grad_norm": 0.4202132523059845, "learning_rate": 5.465325936199724e-06, "loss": 0.0159, "step": 6549 }, { "epoch": 4.5375822653273294, "grad_norm": 0.4816703200340271, "learning_rate": 5.464632454923717e-06, "loss": 0.018, "step": 6550 }, { "epoch": 4.538275025978525, "grad_norm": 0.5029601454734802, "learning_rate": 5.463938973647712e-06, "loss": 0.0204, "step": 6551 }, { "epoch": 4.538967786629719, "grad_norm": 0.47813183069229126, "learning_rate": 5.463245492371706e-06, "loss": 0.0176, "step": 6552 }, { "epoch": 4.539660547280914, "grad_norm": 0.48515447974205017, "learning_rate": 5.462552011095701e-06, "loss": 0.0198, "step": 6553 }, { "epoch": 4.54035330793211, "grad_norm": 0.5452256202697754, "learning_rate": 5.461858529819696e-06, "loss": 0.0163, "step": 6554 }, { "epoch": 4.541046068583304, "grad_norm": 0.6523408889770508, "learning_rate": 5.46116504854369e-06, "loss": 0.0219, "step": 6555 }, { "epoch": 4.541738829234499, "grad_norm": 0.42483633756637573, "learning_rate": 5.460471567267685e-06, "loss": 0.016, "step": 6556 }, { "epoch": 4.542431589885695, "grad_norm": 0.4776119589805603, "learning_rate": 5.459778085991678e-06, "loss": 0.015, "step": 6557 }, { "epoch": 4.543124350536889, "grad_norm": 0.45716530084609985, "learning_rate": 5.459084604715673e-06, "loss": 0.0174, "step": 6558 }, { "epoch": 4.543817111188084, "grad_norm": 0.4417690634727478, "learning_rate": 5.458391123439668e-06, "loss": 0.0155, "step": 6559 }, { "epoch": 4.54450987183928, "grad_norm": 0.49368464946746826, "learning_rate": 5.457697642163662e-06, "loss": 0.0154, "step": 6560 }, { "epoch": 4.545202632490475, "grad_norm": 0.5182321667671204, "learning_rate": 5.457004160887657e-06, "loss": 0.0204, "step": 6561 }, { "epoch": 4.545895393141669, "grad_norm": 0.5918089151382446, "learning_rate": 5.45631067961165e-06, "loss": 0.0112, "step": 6562 }, { "epoch": 4.5465881537928645, "grad_norm": 0.6511187553405762, "learning_rate": 5.455617198335645e-06, "loss": 0.0187, "step": 6563 }, { "epoch": 4.54728091444406, "grad_norm": 0.4714299142360687, "learning_rate": 5.45492371705964e-06, "loss": 0.0144, "step": 6564 }, { "epoch": 4.547973675095255, "grad_norm": 0.4289691746234894, "learning_rate": 5.454230235783634e-06, "loss": 0.0143, "step": 6565 }, { "epoch": 4.5486664357464495, "grad_norm": 0.4590403139591217, "learning_rate": 5.453536754507629e-06, "loss": 0.0159, "step": 6566 }, { "epoch": 4.549359196397645, "grad_norm": 0.3824339509010315, "learning_rate": 5.452843273231623e-06, "loss": 0.0115, "step": 6567 }, { "epoch": 4.55005195704884, "grad_norm": 0.44767406582832336, "learning_rate": 5.452149791955618e-06, "loss": 0.015, "step": 6568 }, { "epoch": 4.550744717700034, "grad_norm": 0.5141071677207947, "learning_rate": 5.451456310679612e-06, "loss": 0.0194, "step": 6569 }, { "epoch": 4.55143747835123, "grad_norm": 0.5163344740867615, "learning_rate": 5.450762829403606e-06, "loss": 0.0146, "step": 6570 }, { "epoch": 4.552130239002425, "grad_norm": 0.49274778366088867, "learning_rate": 5.450069348127601e-06, "loss": 0.0167, "step": 6571 }, { "epoch": 4.552822999653619, "grad_norm": 0.4520893096923828, "learning_rate": 5.449375866851595e-06, "loss": 0.0145, "step": 6572 }, { "epoch": 4.553515760304815, "grad_norm": 1.0724546909332275, "learning_rate": 5.44868238557559e-06, "loss": 0.0195, "step": 6573 }, { "epoch": 4.55420852095601, "grad_norm": 0.47798892855644226, "learning_rate": 5.447988904299585e-06, "loss": 0.0152, "step": 6574 }, { "epoch": 4.554901281607204, "grad_norm": 0.580756664276123, "learning_rate": 5.447295423023578e-06, "loss": 0.0116, "step": 6575 }, { "epoch": 4.5555940422584, "grad_norm": 0.43493279814720154, "learning_rate": 5.446601941747573e-06, "loss": 0.0157, "step": 6576 }, { "epoch": 4.556286802909595, "grad_norm": 0.4682376980781555, "learning_rate": 5.445908460471567e-06, "loss": 0.0164, "step": 6577 }, { "epoch": 4.556979563560789, "grad_norm": 0.456667423248291, "learning_rate": 5.445214979195562e-06, "loss": 0.0146, "step": 6578 }, { "epoch": 4.5576723242119845, "grad_norm": 0.4843285083770752, "learning_rate": 5.444521497919557e-06, "loss": 0.0113, "step": 6579 }, { "epoch": 4.55836508486318, "grad_norm": 0.6579593420028687, "learning_rate": 5.4438280166435505e-06, "loss": 0.0183, "step": 6580 }, { "epoch": 4.559057845514375, "grad_norm": 0.6190991997718811, "learning_rate": 5.4431345353675454e-06, "loss": 0.0163, "step": 6581 }, { "epoch": 4.5597506061655695, "grad_norm": 0.44875380396842957, "learning_rate": 5.4424410540915395e-06, "loss": 0.0142, "step": 6582 }, { "epoch": 4.560443366816765, "grad_norm": 0.5786736011505127, "learning_rate": 5.4417475728155345e-06, "loss": 0.0189, "step": 6583 }, { "epoch": 4.56113612746796, "grad_norm": 0.4336460828781128, "learning_rate": 5.441054091539529e-06, "loss": 0.0159, "step": 6584 }, { "epoch": 4.561828888119155, "grad_norm": 0.5271535515785217, "learning_rate": 5.4403606102635235e-06, "loss": 0.0187, "step": 6585 }, { "epoch": 4.56252164877035, "grad_norm": 0.4854520857334137, "learning_rate": 5.439667128987518e-06, "loss": 0.0153, "step": 6586 }, { "epoch": 4.563214409421545, "grad_norm": 0.4231073260307312, "learning_rate": 5.438973647711512e-06, "loss": 0.0184, "step": 6587 }, { "epoch": 4.56390717007274, "grad_norm": 0.5371063351631165, "learning_rate": 5.438280166435507e-06, "loss": 0.0148, "step": 6588 }, { "epoch": 4.564599930723935, "grad_norm": 0.4960605800151825, "learning_rate": 5.4375866851595015e-06, "loss": 0.0182, "step": 6589 }, { "epoch": 4.56529269137513, "grad_norm": 0.7578689455986023, "learning_rate": 5.436893203883496e-06, "loss": 0.0229, "step": 6590 }, { "epoch": 4.565985452026325, "grad_norm": 0.520584762096405, "learning_rate": 5.4361997226074905e-06, "loss": 0.0197, "step": 6591 }, { "epoch": 4.56667821267752, "grad_norm": 0.6162667274475098, "learning_rate": 5.435506241331484e-06, "loss": 0.0179, "step": 6592 }, { "epoch": 4.567370973328715, "grad_norm": 0.7186487317085266, "learning_rate": 5.434812760055479e-06, "loss": 0.0181, "step": 6593 }, { "epoch": 4.56806373397991, "grad_norm": 0.4948159158229828, "learning_rate": 5.434119278779474e-06, "loss": 0.0153, "step": 6594 }, { "epoch": 4.568756494631105, "grad_norm": 0.574824869632721, "learning_rate": 5.433425797503468e-06, "loss": 0.0178, "step": 6595 }, { "epoch": 4.5694492552823, "grad_norm": 0.4070855975151062, "learning_rate": 5.432732316227463e-06, "loss": 0.0135, "step": 6596 }, { "epoch": 4.570142015933495, "grad_norm": 0.4459018111228943, "learning_rate": 5.432038834951457e-06, "loss": 0.0109, "step": 6597 }, { "epoch": 4.5708347765846895, "grad_norm": 0.48724040389060974, "learning_rate": 5.431345353675451e-06, "loss": 0.0154, "step": 6598 }, { "epoch": 4.571527537235885, "grad_norm": 0.43888628482818604, "learning_rate": 5.430651872399446e-06, "loss": 0.014, "step": 6599 }, { "epoch": 4.57222029788708, "grad_norm": 0.5620042681694031, "learning_rate": 5.42995839112344e-06, "loss": 0.0194, "step": 6600 }, { "epoch": 4.572913058538275, "grad_norm": 0.499756395816803, "learning_rate": 5.429264909847435e-06, "loss": 0.0161, "step": 6601 }, { "epoch": 4.57360581918947, "grad_norm": 0.5070227980613708, "learning_rate": 5.428571428571429e-06, "loss": 0.0158, "step": 6602 }, { "epoch": 4.574298579840665, "grad_norm": 0.5882565379142761, "learning_rate": 5.427877947295424e-06, "loss": 0.0198, "step": 6603 }, { "epoch": 4.57499134049186, "grad_norm": 0.4141419231891632, "learning_rate": 5.427184466019419e-06, "loss": 0.0147, "step": 6604 }, { "epoch": 4.575684101143056, "grad_norm": 0.372179239988327, "learning_rate": 5.426490984743412e-06, "loss": 0.0127, "step": 6605 }, { "epoch": 4.57637686179425, "grad_norm": 0.5299352407455444, "learning_rate": 5.425797503467407e-06, "loss": 0.0261, "step": 6606 }, { "epoch": 4.577069622445445, "grad_norm": 0.7036036252975464, "learning_rate": 5.425104022191401e-06, "loss": 0.0183, "step": 6607 }, { "epoch": 4.5777623830966405, "grad_norm": 0.3718984127044678, "learning_rate": 5.424410540915396e-06, "loss": 0.0159, "step": 6608 }, { "epoch": 4.578455143747835, "grad_norm": 0.5675950646400452, "learning_rate": 5.423717059639391e-06, "loss": 0.0136, "step": 6609 }, { "epoch": 4.57914790439903, "grad_norm": 0.5094320178031921, "learning_rate": 5.423023578363384e-06, "loss": 0.0148, "step": 6610 }, { "epoch": 4.5798406650502255, "grad_norm": 0.5102602243423462, "learning_rate": 5.422330097087379e-06, "loss": 0.0164, "step": 6611 }, { "epoch": 4.58053342570142, "grad_norm": 0.5119156241416931, "learning_rate": 5.421636615811373e-06, "loss": 0.0211, "step": 6612 }, { "epoch": 4.581226186352615, "grad_norm": 0.49560511112213135, "learning_rate": 5.420943134535368e-06, "loss": 0.0138, "step": 6613 }, { "epoch": 4.58191894700381, "grad_norm": 0.5669496655464172, "learning_rate": 5.420249653259363e-06, "loss": 0.0251, "step": 6614 }, { "epoch": 4.582611707655005, "grad_norm": 0.5031467080116272, "learning_rate": 5.419556171983357e-06, "loss": 0.0157, "step": 6615 }, { "epoch": 4.5833044683062, "grad_norm": 0.5215955376625061, "learning_rate": 5.418862690707352e-06, "loss": 0.0184, "step": 6616 }, { "epoch": 4.583997228957395, "grad_norm": 0.4663569927215576, "learning_rate": 5.418169209431345e-06, "loss": 0.0153, "step": 6617 }, { "epoch": 4.58468998960859, "grad_norm": 0.5357926487922668, "learning_rate": 5.41747572815534e-06, "loss": 0.0257, "step": 6618 }, { "epoch": 4.585382750259785, "grad_norm": 0.8061259388923645, "learning_rate": 5.416782246879335e-06, "loss": 0.0189, "step": 6619 }, { "epoch": 4.58607551091098, "grad_norm": 0.47571200132369995, "learning_rate": 5.416088765603329e-06, "loss": 0.0147, "step": 6620 }, { "epoch": 4.586768271562176, "grad_norm": 0.5356894731521606, "learning_rate": 5.415395284327324e-06, "loss": 0.0227, "step": 6621 }, { "epoch": 4.58746103221337, "grad_norm": 0.556513249874115, "learning_rate": 5.414701803051317e-06, "loss": 0.0214, "step": 6622 }, { "epoch": 4.588153792864565, "grad_norm": 0.680359423160553, "learning_rate": 5.414008321775312e-06, "loss": 0.0167, "step": 6623 }, { "epoch": 4.5888465535157605, "grad_norm": 0.512295126914978, "learning_rate": 5.413314840499307e-06, "loss": 0.0175, "step": 6624 }, { "epoch": 4.589539314166955, "grad_norm": 0.5296668410301208, "learning_rate": 5.412621359223301e-06, "loss": 0.0173, "step": 6625 }, { "epoch": 4.59023207481815, "grad_norm": 0.40958574414253235, "learning_rate": 5.411927877947296e-06, "loss": 0.0129, "step": 6626 }, { "epoch": 4.5909248354693455, "grad_norm": 0.4297935962677002, "learning_rate": 5.4112343966712895e-06, "loss": 0.0125, "step": 6627 }, { "epoch": 4.591617596120541, "grad_norm": 0.4897463619709015, "learning_rate": 5.4105409153952844e-06, "loss": 0.0173, "step": 6628 }, { "epoch": 4.592310356771735, "grad_norm": 0.40074458718299866, "learning_rate": 5.409847434119279e-06, "loss": 0.0166, "step": 6629 }, { "epoch": 4.59300311742293, "grad_norm": 1.348474144935608, "learning_rate": 5.4091539528432735e-06, "loss": 0.0195, "step": 6630 }, { "epoch": 4.593695878074126, "grad_norm": 0.5647010803222656, "learning_rate": 5.408460471567268e-06, "loss": 0.0231, "step": 6631 }, { "epoch": 4.59438863872532, "grad_norm": 0.43581849336624146, "learning_rate": 5.4077669902912625e-06, "loss": 0.0136, "step": 6632 }, { "epoch": 4.595081399376515, "grad_norm": 0.46688055992126465, "learning_rate": 5.407073509015257e-06, "loss": 0.0168, "step": 6633 }, { "epoch": 4.595774160027711, "grad_norm": 0.5506585836410522, "learning_rate": 5.406380027739252e-06, "loss": 0.0161, "step": 6634 }, { "epoch": 4.596466920678905, "grad_norm": 0.3996039927005768, "learning_rate": 5.405686546463246e-06, "loss": 0.0139, "step": 6635 }, { "epoch": 4.5971596813301, "grad_norm": 0.4879164397716522, "learning_rate": 5.4049930651872405e-06, "loss": 0.0178, "step": 6636 }, { "epoch": 4.597852441981296, "grad_norm": 0.7078427672386169, "learning_rate": 5.404299583911235e-06, "loss": 0.0154, "step": 6637 }, { "epoch": 4.59854520263249, "grad_norm": 0.7090429067611694, "learning_rate": 5.4036061026352295e-06, "loss": 0.015, "step": 6638 }, { "epoch": 4.599237963283685, "grad_norm": 0.43521860241889954, "learning_rate": 5.4029126213592245e-06, "loss": 0.0129, "step": 6639 }, { "epoch": 4.599930723934881, "grad_norm": 0.48858875036239624, "learning_rate": 5.402219140083218e-06, "loss": 0.0189, "step": 6640 }, { "epoch": 4.600623484586076, "grad_norm": 0.5457018613815308, "learning_rate": 5.401525658807213e-06, "loss": 0.0164, "step": 6641 }, { "epoch": 4.60131624523727, "grad_norm": 0.445328950881958, "learning_rate": 5.400832177531207e-06, "loss": 0.0118, "step": 6642 }, { "epoch": 4.6020090058884655, "grad_norm": 0.4922443628311157, "learning_rate": 5.400138696255202e-06, "loss": 0.0161, "step": 6643 }, { "epoch": 4.602701766539661, "grad_norm": 0.584332287311554, "learning_rate": 5.399445214979197e-06, "loss": 0.0181, "step": 6644 }, { "epoch": 4.603394527190855, "grad_norm": 0.41912952065467834, "learning_rate": 5.398751733703191e-06, "loss": 0.0125, "step": 6645 }, { "epoch": 4.6040872878420505, "grad_norm": 0.503561794757843, "learning_rate": 5.398058252427185e-06, "loss": 0.0234, "step": 6646 }, { "epoch": 4.604780048493246, "grad_norm": 0.4105199873447418, "learning_rate": 5.397364771151179e-06, "loss": 0.0105, "step": 6647 }, { "epoch": 4.605472809144441, "grad_norm": 0.478679358959198, "learning_rate": 5.396671289875174e-06, "loss": 0.0177, "step": 6648 }, { "epoch": 4.606165569795635, "grad_norm": 0.428882896900177, "learning_rate": 5.395977808599169e-06, "loss": 0.0155, "step": 6649 }, { "epoch": 4.606858330446831, "grad_norm": 0.48732271790504456, "learning_rate": 5.395284327323163e-06, "loss": 0.0186, "step": 6650 }, { "epoch": 4.607551091098026, "grad_norm": 0.3989153206348419, "learning_rate": 5.394590846047158e-06, "loss": 0.0135, "step": 6651 }, { "epoch": 4.60824385174922, "grad_norm": 0.396588534116745, "learning_rate": 5.393897364771151e-06, "loss": 0.014, "step": 6652 }, { "epoch": 4.608936612400416, "grad_norm": 0.46881407499313354, "learning_rate": 5.393203883495146e-06, "loss": 0.0178, "step": 6653 }, { "epoch": 4.609629373051611, "grad_norm": 0.42429134249687195, "learning_rate": 5.392510402219141e-06, "loss": 0.0131, "step": 6654 }, { "epoch": 4.610322133702805, "grad_norm": 0.4902574419975281, "learning_rate": 5.391816920943135e-06, "loss": 0.0131, "step": 6655 }, { "epoch": 4.611014894354001, "grad_norm": 0.4487420618534088, "learning_rate": 5.39112343966713e-06, "loss": 0.014, "step": 6656 }, { "epoch": 4.611707655005196, "grad_norm": 0.5300506353378296, "learning_rate": 5.390429958391123e-06, "loss": 0.0121, "step": 6657 }, { "epoch": 4.61240041565639, "grad_norm": 0.5802969336509705, "learning_rate": 5.389736477115118e-06, "loss": 0.0151, "step": 6658 }, { "epoch": 4.6130931763075855, "grad_norm": 0.6159478425979614, "learning_rate": 5.389042995839113e-06, "loss": 0.0166, "step": 6659 }, { "epoch": 4.613785936958781, "grad_norm": 0.36077210307121277, "learning_rate": 5.388349514563107e-06, "loss": 0.013, "step": 6660 }, { "epoch": 4.614478697609976, "grad_norm": 0.44819772243499756, "learning_rate": 5.387656033287102e-06, "loss": 0.0119, "step": 6661 }, { "epoch": 4.6151714582611705, "grad_norm": 0.7619090676307678, "learning_rate": 5.386962552011096e-06, "loss": 0.0168, "step": 6662 }, { "epoch": 4.615864218912366, "grad_norm": 0.5988100171089172, "learning_rate": 5.386269070735091e-06, "loss": 0.024, "step": 6663 }, { "epoch": 4.616556979563561, "grad_norm": 0.5031140446662903, "learning_rate": 5.385575589459086e-06, "loss": 0.0181, "step": 6664 }, { "epoch": 4.617249740214755, "grad_norm": 0.38109642267227173, "learning_rate": 5.384882108183079e-06, "loss": 0.0149, "step": 6665 }, { "epoch": 4.617942500865951, "grad_norm": 0.5986095070838928, "learning_rate": 5.384188626907074e-06, "loss": 0.0156, "step": 6666 }, { "epoch": 4.618635261517146, "grad_norm": 0.44961240887641907, "learning_rate": 5.383495145631068e-06, "loss": 0.0176, "step": 6667 }, { "epoch": 4.619328022168341, "grad_norm": 0.4712059795856476, "learning_rate": 5.382801664355063e-06, "loss": 0.0216, "step": 6668 }, { "epoch": 4.620020782819536, "grad_norm": 0.39034727215766907, "learning_rate": 5.382108183079058e-06, "loss": 0.0119, "step": 6669 }, { "epoch": 4.620713543470731, "grad_norm": 0.5380370020866394, "learning_rate": 5.381414701803051e-06, "loss": 0.0142, "step": 6670 }, { "epoch": 4.621406304121926, "grad_norm": 0.45525482296943665, "learning_rate": 5.380721220527046e-06, "loss": 0.0156, "step": 6671 }, { "epoch": 4.622099064773121, "grad_norm": 0.5401086211204529, "learning_rate": 5.38002773925104e-06, "loss": 0.0154, "step": 6672 }, { "epoch": 4.622791825424316, "grad_norm": 0.5880857110023499, "learning_rate": 5.379334257975035e-06, "loss": 0.0154, "step": 6673 }, { "epoch": 4.623484586075511, "grad_norm": 0.5571756362915039, "learning_rate": 5.37864077669903e-06, "loss": 0.0185, "step": 6674 }, { "epoch": 4.6241773467267056, "grad_norm": 0.4018222391605377, "learning_rate": 5.3779472954230234e-06, "loss": 0.0124, "step": 6675 }, { "epoch": 4.624870107377901, "grad_norm": 0.4577995538711548, "learning_rate": 5.377253814147018e-06, "loss": 0.0126, "step": 6676 }, { "epoch": 4.625562868029096, "grad_norm": 0.5859726071357727, "learning_rate": 5.3765603328710125e-06, "loss": 0.0167, "step": 6677 }, { "epoch": 4.6262556286802905, "grad_norm": 0.44329333305358887, "learning_rate": 5.375866851595007e-06, "loss": 0.0113, "step": 6678 }, { "epoch": 4.626948389331486, "grad_norm": 0.5705049633979797, "learning_rate": 5.375173370319002e-06, "loss": 0.0147, "step": 6679 }, { "epoch": 4.627641149982681, "grad_norm": 0.4552869498729706, "learning_rate": 5.374479889042996e-06, "loss": 0.0137, "step": 6680 }, { "epoch": 4.628333910633876, "grad_norm": 0.5169498324394226, "learning_rate": 5.373786407766991e-06, "loss": 0.0173, "step": 6681 }, { "epoch": 4.629026671285071, "grad_norm": 0.5297955870628357, "learning_rate": 5.373092926490985e-06, "loss": 0.0164, "step": 6682 }, { "epoch": 4.629719431936266, "grad_norm": 0.49822568893432617, "learning_rate": 5.3723994452149795e-06, "loss": 0.0178, "step": 6683 }, { "epoch": 4.630412192587461, "grad_norm": 0.4892895817756653, "learning_rate": 5.3717059639389744e-06, "loss": 0.0148, "step": 6684 }, { "epoch": 4.631104953238656, "grad_norm": 0.737156093120575, "learning_rate": 5.3710124826629685e-06, "loss": 0.0169, "step": 6685 }, { "epoch": 4.631797713889851, "grad_norm": 0.5654307007789612, "learning_rate": 5.3703190013869635e-06, "loss": 0.0189, "step": 6686 }, { "epoch": 4.632490474541046, "grad_norm": 0.6652435660362244, "learning_rate": 5.369625520110957e-06, "loss": 0.0186, "step": 6687 }, { "epoch": 4.6331832351922415, "grad_norm": 0.4329829812049866, "learning_rate": 5.368932038834952e-06, "loss": 0.0142, "step": 6688 }, { "epoch": 4.633875995843436, "grad_norm": 0.47517314553260803, "learning_rate": 5.3682385575589466e-06, "loss": 0.0154, "step": 6689 }, { "epoch": 4.634568756494631, "grad_norm": 0.47700613737106323, "learning_rate": 5.367545076282941e-06, "loss": 0.0183, "step": 6690 }, { "epoch": 4.6352615171458265, "grad_norm": 0.4429062604904175, "learning_rate": 5.366851595006936e-06, "loss": 0.0173, "step": 6691 }, { "epoch": 4.635954277797021, "grad_norm": 0.5844759941101074, "learning_rate": 5.36615811373093e-06, "loss": 0.0218, "step": 6692 }, { "epoch": 4.636647038448216, "grad_norm": 0.5172418355941772, "learning_rate": 5.365464632454925e-06, "loss": 0.0184, "step": 6693 }, { "epoch": 4.637339799099411, "grad_norm": 0.4225928485393524, "learning_rate": 5.364771151178919e-06, "loss": 0.0138, "step": 6694 }, { "epoch": 4.638032559750606, "grad_norm": 0.5062415599822998, "learning_rate": 5.364077669902913e-06, "loss": 0.0202, "step": 6695 }, { "epoch": 4.638725320401801, "grad_norm": 0.44356057047843933, "learning_rate": 5.363384188626908e-06, "loss": 0.0165, "step": 6696 }, { "epoch": 4.639418081052996, "grad_norm": 0.4556675851345062, "learning_rate": 5.362690707350902e-06, "loss": 0.0165, "step": 6697 }, { "epoch": 4.640110841704191, "grad_norm": 0.4357486963272095, "learning_rate": 5.361997226074897e-06, "loss": 0.0109, "step": 6698 }, { "epoch": 4.640803602355386, "grad_norm": 0.44267845153808594, "learning_rate": 5.361303744798892e-06, "loss": 0.0135, "step": 6699 }, { "epoch": 4.641496363006581, "grad_norm": 0.4378957450389862, "learning_rate": 5.360610263522885e-06, "loss": 0.0123, "step": 6700 }, { "epoch": 4.642189123657777, "grad_norm": 0.4551459550857544, "learning_rate": 5.35991678224688e-06, "loss": 0.0154, "step": 6701 }, { "epoch": 4.642881884308971, "grad_norm": 0.5333576202392578, "learning_rate": 5.359223300970874e-06, "loss": 0.022, "step": 6702 }, { "epoch": 4.643574644960166, "grad_norm": 0.7531522512435913, "learning_rate": 5.358529819694869e-06, "loss": 0.0146, "step": 6703 }, { "epoch": 4.6442674056113615, "grad_norm": 0.5332921147346497, "learning_rate": 5.357836338418864e-06, "loss": 0.0136, "step": 6704 }, { "epoch": 4.644960166262556, "grad_norm": 0.4230230748653412, "learning_rate": 5.357142857142857e-06, "loss": 0.0157, "step": 6705 }, { "epoch": 4.645652926913751, "grad_norm": 0.42440685629844666, "learning_rate": 5.356449375866852e-06, "loss": 0.0125, "step": 6706 }, { "epoch": 4.6463456875649465, "grad_norm": 0.41778454184532166, "learning_rate": 5.355755894590846e-06, "loss": 0.0122, "step": 6707 }, { "epoch": 4.647038448216142, "grad_norm": 0.40710264444351196, "learning_rate": 5.355062413314841e-06, "loss": 0.0121, "step": 6708 }, { "epoch": 4.647731208867336, "grad_norm": 0.6806241869926453, "learning_rate": 5.354368932038836e-06, "loss": 0.0177, "step": 6709 }, { "epoch": 4.648423969518531, "grad_norm": 0.5046828985214233, "learning_rate": 5.35367545076283e-06, "loss": 0.0157, "step": 6710 }, { "epoch": 4.649116730169727, "grad_norm": 0.32613328099250793, "learning_rate": 5.352981969486825e-06, "loss": 0.0095, "step": 6711 }, { "epoch": 4.649809490820921, "grad_norm": 0.4319767653942108, "learning_rate": 5.352288488210818e-06, "loss": 0.0142, "step": 6712 }, { "epoch": 4.650502251472116, "grad_norm": 0.44726043939590454, "learning_rate": 5.351595006934813e-06, "loss": 0.0142, "step": 6713 }, { "epoch": 4.651195012123312, "grad_norm": 0.4683282673358917, "learning_rate": 5.350901525658808e-06, "loss": 0.0125, "step": 6714 }, { "epoch": 4.651887772774506, "grad_norm": 0.44113773107528687, "learning_rate": 5.350208044382802e-06, "loss": 0.013, "step": 6715 }, { "epoch": 4.652580533425701, "grad_norm": 0.849155604839325, "learning_rate": 5.349514563106797e-06, "loss": 0.0154, "step": 6716 }, { "epoch": 4.653273294076897, "grad_norm": 0.4086134135723114, "learning_rate": 5.34882108183079e-06, "loss": 0.0114, "step": 6717 }, { "epoch": 4.653966054728091, "grad_norm": 0.5375789999961853, "learning_rate": 5.348127600554785e-06, "loss": 0.0161, "step": 6718 }, { "epoch": 4.654658815379286, "grad_norm": 0.3824283480644226, "learning_rate": 5.34743411927878e-06, "loss": 0.0121, "step": 6719 }, { "epoch": 4.6553515760304816, "grad_norm": 0.5195133090019226, "learning_rate": 5.346740638002774e-06, "loss": 0.0163, "step": 6720 }, { "epoch": 4.656044336681677, "grad_norm": 0.35725370049476624, "learning_rate": 5.346047156726769e-06, "loss": 0.01, "step": 6721 }, { "epoch": 4.656737097332871, "grad_norm": 0.38447943329811096, "learning_rate": 5.345353675450763e-06, "loss": 0.0204, "step": 6722 }, { "epoch": 4.6574298579840665, "grad_norm": 0.4080303907394409, "learning_rate": 5.344660194174757e-06, "loss": 0.0146, "step": 6723 }, { "epoch": 4.658122618635262, "grad_norm": 0.5283615589141846, "learning_rate": 5.343966712898752e-06, "loss": 0.0131, "step": 6724 }, { "epoch": 4.658815379286456, "grad_norm": 0.5128315091133118, "learning_rate": 5.343273231622746e-06, "loss": 0.0154, "step": 6725 }, { "epoch": 4.6595081399376514, "grad_norm": 0.40859082341194153, "learning_rate": 5.342579750346741e-06, "loss": 0.0096, "step": 6726 }, { "epoch": 4.660200900588847, "grad_norm": 0.6382933855056763, "learning_rate": 5.341886269070735e-06, "loss": 0.0196, "step": 6727 }, { "epoch": 4.660893661240042, "grad_norm": 0.48991847038269043, "learning_rate": 5.34119278779473e-06, "loss": 0.0152, "step": 6728 }, { "epoch": 4.661586421891236, "grad_norm": 0.4030665457248688, "learning_rate": 5.340499306518725e-06, "loss": 0.0101, "step": 6729 }, { "epoch": 4.662279182542432, "grad_norm": 0.5373626947402954, "learning_rate": 5.3398058252427185e-06, "loss": 0.0159, "step": 6730 }, { "epoch": 4.662971943193627, "grad_norm": 0.5690442323684692, "learning_rate": 5.3391123439667134e-06, "loss": 0.0146, "step": 6731 }, { "epoch": 4.663664703844821, "grad_norm": 0.4243113696575165, "learning_rate": 5.3384188626907075e-06, "loss": 0.0132, "step": 6732 }, { "epoch": 4.664357464496017, "grad_norm": 0.39271458983421326, "learning_rate": 5.3377253814147025e-06, "loss": 0.0123, "step": 6733 }, { "epoch": 4.665050225147212, "grad_norm": 0.501668393611908, "learning_rate": 5.337031900138697e-06, "loss": 0.0144, "step": 6734 }, { "epoch": 4.665742985798406, "grad_norm": 0.44533124566078186, "learning_rate": 5.336338418862691e-06, "loss": 0.014, "step": 6735 }, { "epoch": 4.666435746449602, "grad_norm": 0.420138955116272, "learning_rate": 5.3356449375866856e-06, "loss": 0.0146, "step": 6736 }, { "epoch": 4.667128507100797, "grad_norm": 0.42193126678466797, "learning_rate": 5.33495145631068e-06, "loss": 0.0183, "step": 6737 }, { "epoch": 4.667821267751991, "grad_norm": 0.4445243775844574, "learning_rate": 5.334257975034675e-06, "loss": 0.0143, "step": 6738 }, { "epoch": 4.6685140284031865, "grad_norm": 0.44470784068107605, "learning_rate": 5.3335644937586695e-06, "loss": 0.013, "step": 6739 }, { "epoch": 4.669206789054382, "grad_norm": 0.643493115901947, "learning_rate": 5.332871012482664e-06, "loss": 0.0153, "step": 6740 }, { "epoch": 4.669899549705577, "grad_norm": 0.4524647295475006, "learning_rate": 5.3321775312066585e-06, "loss": 0.0125, "step": 6741 }, { "epoch": 4.6705923103567715, "grad_norm": 0.7606937885284424, "learning_rate": 5.331484049930652e-06, "loss": 0.0195, "step": 6742 }, { "epoch": 4.671285071007967, "grad_norm": 0.4757654368877411, "learning_rate": 5.330790568654647e-06, "loss": 0.0125, "step": 6743 }, { "epoch": 4.671977831659162, "grad_norm": 0.608229398727417, "learning_rate": 5.330097087378642e-06, "loss": 0.0181, "step": 6744 }, { "epoch": 4.672670592310356, "grad_norm": 0.3284655213356018, "learning_rate": 5.329403606102636e-06, "loss": 0.0102, "step": 6745 }, { "epoch": 4.673363352961552, "grad_norm": 0.34887823462486267, "learning_rate": 5.328710124826631e-06, "loss": 0.0111, "step": 6746 }, { "epoch": 4.674056113612747, "grad_norm": 0.5461851954460144, "learning_rate": 5.328016643550624e-06, "loss": 0.0172, "step": 6747 }, { "epoch": 4.674748874263942, "grad_norm": 0.38614967465400696, "learning_rate": 5.327323162274619e-06, "loss": 0.0136, "step": 6748 }, { "epoch": 4.675441634915137, "grad_norm": 0.3726162016391754, "learning_rate": 5.326629680998614e-06, "loss": 0.0112, "step": 6749 }, { "epoch": 4.676134395566332, "grad_norm": 0.4830145239830017, "learning_rate": 5.325936199722608e-06, "loss": 0.0167, "step": 6750 }, { "epoch": 4.676827156217527, "grad_norm": 0.39423564076423645, "learning_rate": 5.325242718446603e-06, "loss": 0.0128, "step": 6751 }, { "epoch": 4.677519916868722, "grad_norm": 0.4504843056201935, "learning_rate": 5.324549237170597e-06, "loss": 0.0159, "step": 6752 }, { "epoch": 4.678212677519917, "grad_norm": 0.5660965442657471, "learning_rate": 5.323855755894591e-06, "loss": 0.0133, "step": 6753 }, { "epoch": 4.678905438171112, "grad_norm": 0.48100876808166504, "learning_rate": 5.323162274618586e-06, "loss": 0.0183, "step": 6754 }, { "epoch": 4.6795981988223065, "grad_norm": 0.4504172205924988, "learning_rate": 5.32246879334258e-06, "loss": 0.018, "step": 6755 }, { "epoch": 4.680290959473502, "grad_norm": 0.3996303081512451, "learning_rate": 5.321775312066575e-06, "loss": 0.013, "step": 6756 }, { "epoch": 4.680983720124697, "grad_norm": 0.395956814289093, "learning_rate": 5.321081830790569e-06, "loss": 0.0128, "step": 6757 }, { "epoch": 4.6816764807758915, "grad_norm": 0.4500115215778351, "learning_rate": 5.320388349514564e-06, "loss": 0.0171, "step": 6758 }, { "epoch": 4.682369241427087, "grad_norm": 0.4182335138320923, "learning_rate": 5.319694868238559e-06, "loss": 0.0125, "step": 6759 }, { "epoch": 4.683062002078282, "grad_norm": 0.46114209294319153, "learning_rate": 5.319001386962552e-06, "loss": 0.0206, "step": 6760 }, { "epoch": 4.683754762729477, "grad_norm": 0.4365215003490448, "learning_rate": 5.318307905686547e-06, "loss": 0.0126, "step": 6761 }, { "epoch": 4.684447523380672, "grad_norm": 0.5210328102111816, "learning_rate": 5.317614424410541e-06, "loss": 0.0198, "step": 6762 }, { "epoch": 4.685140284031867, "grad_norm": 0.4795258045196533, "learning_rate": 5.316920943134536e-06, "loss": 0.0168, "step": 6763 }, { "epoch": 4.685833044683062, "grad_norm": 0.42756351828575134, "learning_rate": 5.316227461858531e-06, "loss": 0.0181, "step": 6764 }, { "epoch": 4.686525805334257, "grad_norm": 0.46755433082580566, "learning_rate": 5.315533980582524e-06, "loss": 0.0189, "step": 6765 }, { "epoch": 4.687218565985452, "grad_norm": 0.5363218188285828, "learning_rate": 5.314840499306519e-06, "loss": 0.0176, "step": 6766 }, { "epoch": 4.687911326636647, "grad_norm": 0.44285398721694946, "learning_rate": 5.314147018030513e-06, "loss": 0.0126, "step": 6767 }, { "epoch": 4.6886040872878425, "grad_norm": 0.47270604968070984, "learning_rate": 5.313453536754508e-06, "loss": 0.0136, "step": 6768 }, { "epoch": 4.689296847939037, "grad_norm": 0.5872167348861694, "learning_rate": 5.312760055478503e-06, "loss": 0.0202, "step": 6769 }, { "epoch": 4.689989608590232, "grad_norm": 0.49353814125061035, "learning_rate": 5.312066574202497e-06, "loss": 0.0219, "step": 6770 }, { "epoch": 4.6906823692414275, "grad_norm": 0.5511910915374756, "learning_rate": 5.311373092926492e-06, "loss": 0.0131, "step": 6771 }, { "epoch": 4.691375129892622, "grad_norm": 0.4286562204360962, "learning_rate": 5.310679611650485e-06, "loss": 0.0136, "step": 6772 }, { "epoch": 4.692067890543817, "grad_norm": 0.41244441270828247, "learning_rate": 5.30998613037448e-06, "loss": 0.012, "step": 6773 }, { "epoch": 4.692760651195012, "grad_norm": 0.5870745778083801, "learning_rate": 5.309292649098475e-06, "loss": 0.0152, "step": 6774 }, { "epoch": 4.693453411846207, "grad_norm": 0.36597204208374023, "learning_rate": 5.308599167822469e-06, "loss": 0.0117, "step": 6775 }, { "epoch": 4.694146172497402, "grad_norm": 0.5219966173171997, "learning_rate": 5.307905686546464e-06, "loss": 0.0189, "step": 6776 }, { "epoch": 4.694838933148597, "grad_norm": 0.5244627594947815, "learning_rate": 5.3072122052704575e-06, "loss": 0.0173, "step": 6777 }, { "epoch": 4.695531693799792, "grad_norm": 0.6822748184204102, "learning_rate": 5.3065187239944524e-06, "loss": 0.0159, "step": 6778 }, { "epoch": 4.696224454450987, "grad_norm": 0.5595081448554993, "learning_rate": 5.305825242718447e-06, "loss": 0.0216, "step": 6779 }, { "epoch": 4.696917215102182, "grad_norm": 0.49543243646621704, "learning_rate": 5.3051317614424415e-06, "loss": 0.0214, "step": 6780 }, { "epoch": 4.697609975753378, "grad_norm": 0.5642449855804443, "learning_rate": 5.304438280166436e-06, "loss": 0.0122, "step": 6781 }, { "epoch": 4.698302736404572, "grad_norm": 0.4676438271999359, "learning_rate": 5.30374479889043e-06, "loss": 0.013, "step": 6782 }, { "epoch": 4.698995497055767, "grad_norm": 0.5166946053504944, "learning_rate": 5.3030513176144246e-06, "loss": 0.0162, "step": 6783 }, { "epoch": 4.6996882577069625, "grad_norm": 0.43936529755592346, "learning_rate": 5.3023578363384195e-06, "loss": 0.0141, "step": 6784 }, { "epoch": 4.700381018358157, "grad_norm": 0.5059449076652527, "learning_rate": 5.301664355062414e-06, "loss": 0.0144, "step": 6785 }, { "epoch": 4.701073779009352, "grad_norm": 0.42727038264274597, "learning_rate": 5.3009708737864085e-06, "loss": 0.0112, "step": 6786 }, { "epoch": 4.7017665396605475, "grad_norm": 0.5576202869415283, "learning_rate": 5.300277392510403e-06, "loss": 0.0233, "step": 6787 }, { "epoch": 4.702459300311743, "grad_norm": 0.4915574789047241, "learning_rate": 5.2995839112343975e-06, "loss": 0.016, "step": 6788 }, { "epoch": 4.703152060962937, "grad_norm": 0.41665318608283997, "learning_rate": 5.2988904299583925e-06, "loss": 0.0123, "step": 6789 }, { "epoch": 4.703844821614132, "grad_norm": 0.42135944962501526, "learning_rate": 5.298196948682386e-06, "loss": 0.0128, "step": 6790 }, { "epoch": 4.704537582265328, "grad_norm": 0.5983707308769226, "learning_rate": 5.297503467406381e-06, "loss": 0.0164, "step": 6791 }, { "epoch": 4.705230342916522, "grad_norm": 0.4632401466369629, "learning_rate": 5.296809986130375e-06, "loss": 0.0145, "step": 6792 }, { "epoch": 4.705923103567717, "grad_norm": 0.4878617823123932, "learning_rate": 5.29611650485437e-06, "loss": 0.0162, "step": 6793 }, { "epoch": 4.706615864218913, "grad_norm": 0.4483562409877777, "learning_rate": 5.295423023578365e-06, "loss": 0.0195, "step": 6794 }, { "epoch": 4.707308624870107, "grad_norm": 0.462507963180542, "learning_rate": 5.294729542302358e-06, "loss": 0.0168, "step": 6795 }, { "epoch": 4.708001385521302, "grad_norm": 0.3771485388278961, "learning_rate": 5.294036061026353e-06, "loss": 0.011, "step": 6796 }, { "epoch": 4.708694146172498, "grad_norm": 0.43506211042404175, "learning_rate": 5.293342579750347e-06, "loss": 0.0148, "step": 6797 }, { "epoch": 4.709386906823692, "grad_norm": 0.6274265646934509, "learning_rate": 5.292649098474342e-06, "loss": 0.0135, "step": 6798 }, { "epoch": 4.710079667474887, "grad_norm": 0.5505167841911316, "learning_rate": 5.291955617198337e-06, "loss": 0.0129, "step": 6799 }, { "epoch": 4.7107724281260825, "grad_norm": 0.44486531615257263, "learning_rate": 5.291262135922331e-06, "loss": 0.0164, "step": 6800 }, { "epoch": 4.711465188777278, "grad_norm": 0.6184029579162598, "learning_rate": 5.290568654646325e-06, "loss": 0.024, "step": 6801 }, { "epoch": 4.712157949428472, "grad_norm": 0.5388383269309998, "learning_rate": 5.289875173370319e-06, "loss": 0.0181, "step": 6802 }, { "epoch": 4.7128507100796675, "grad_norm": 0.43575114011764526, "learning_rate": 5.289181692094314e-06, "loss": 0.0142, "step": 6803 }, { "epoch": 4.713543470730863, "grad_norm": 0.5159897208213806, "learning_rate": 5.288488210818309e-06, "loss": 0.017, "step": 6804 }, { "epoch": 4.714236231382057, "grad_norm": 0.5509412288665771, "learning_rate": 5.287794729542303e-06, "loss": 0.0199, "step": 6805 }, { "epoch": 4.714928992033252, "grad_norm": 0.5837615132331848, "learning_rate": 5.287101248266298e-06, "loss": 0.0249, "step": 6806 }, { "epoch": 4.715621752684448, "grad_norm": 0.42776304483413696, "learning_rate": 5.286407766990291e-06, "loss": 0.0133, "step": 6807 }, { "epoch": 4.716314513335643, "grad_norm": 0.4792737066745758, "learning_rate": 5.285714285714286e-06, "loss": 0.0165, "step": 6808 }, { "epoch": 4.717007273986837, "grad_norm": 0.3855811059474945, "learning_rate": 5.285020804438281e-06, "loss": 0.0126, "step": 6809 }, { "epoch": 4.717700034638033, "grad_norm": 0.44185054302215576, "learning_rate": 5.284327323162275e-06, "loss": 0.0128, "step": 6810 }, { "epoch": 4.718392795289228, "grad_norm": 0.5195660591125488, "learning_rate": 5.28363384188627e-06, "loss": 0.014, "step": 6811 }, { "epoch": 4.719085555940422, "grad_norm": 0.4914865493774414, "learning_rate": 5.282940360610263e-06, "loss": 0.0136, "step": 6812 }, { "epoch": 4.719778316591618, "grad_norm": 0.46978315711021423, "learning_rate": 5.282246879334258e-06, "loss": 0.0139, "step": 6813 }, { "epoch": 4.720471077242813, "grad_norm": 0.509129524230957, "learning_rate": 5.281553398058253e-06, "loss": 0.0184, "step": 6814 }, { "epoch": 4.721163837894007, "grad_norm": 0.4341345429420471, "learning_rate": 5.280859916782247e-06, "loss": 0.0151, "step": 6815 }, { "epoch": 4.721856598545203, "grad_norm": 0.5684191584587097, "learning_rate": 5.280166435506242e-06, "loss": 0.0202, "step": 6816 }, { "epoch": 4.722549359196398, "grad_norm": 0.4436483383178711, "learning_rate": 5.279472954230236e-06, "loss": 0.0169, "step": 6817 }, { "epoch": 4.723242119847592, "grad_norm": 0.6761987805366516, "learning_rate": 5.278779472954231e-06, "loss": 0.0189, "step": 6818 }, { "epoch": 4.7239348804987875, "grad_norm": 0.6232472658157349, "learning_rate": 5.278085991678226e-06, "loss": 0.0204, "step": 6819 }, { "epoch": 4.724627641149983, "grad_norm": 0.6841349005699158, "learning_rate": 5.277392510402219e-06, "loss": 0.0199, "step": 6820 }, { "epoch": 4.725320401801178, "grad_norm": 0.4502756893634796, "learning_rate": 5.276699029126214e-06, "loss": 0.0174, "step": 6821 }, { "epoch": 4.7260131624523725, "grad_norm": 0.460126668214798, "learning_rate": 5.276005547850208e-06, "loss": 0.0133, "step": 6822 }, { "epoch": 4.726705923103568, "grad_norm": 0.8593262434005737, "learning_rate": 5.275312066574203e-06, "loss": 0.0186, "step": 6823 }, { "epoch": 4.727398683754763, "grad_norm": 0.4822736978530884, "learning_rate": 5.274618585298198e-06, "loss": 0.016, "step": 6824 }, { "epoch": 4.728091444405957, "grad_norm": 0.5434883832931519, "learning_rate": 5.2739251040221914e-06, "loss": 0.0179, "step": 6825 }, { "epoch": 4.728784205057153, "grad_norm": 0.4977904260158539, "learning_rate": 5.273231622746186e-06, "loss": 0.0145, "step": 6826 }, { "epoch": 4.729476965708348, "grad_norm": 0.5161583423614502, "learning_rate": 5.2725381414701805e-06, "loss": 0.0155, "step": 6827 }, { "epoch": 4.730169726359543, "grad_norm": 0.44691187143325806, "learning_rate": 5.271844660194175e-06, "loss": 0.0215, "step": 6828 }, { "epoch": 4.730862487010738, "grad_norm": 0.40459316968917847, "learning_rate": 5.27115117891817e-06, "loss": 0.0137, "step": 6829 }, { "epoch": 4.731555247661933, "grad_norm": 0.5111734867095947, "learning_rate": 5.2704576976421636e-06, "loss": 0.0155, "step": 6830 }, { "epoch": 4.732248008313128, "grad_norm": 0.424892783164978, "learning_rate": 5.2697642163661585e-06, "loss": 0.0149, "step": 6831 }, { "epoch": 4.732940768964323, "grad_norm": 0.4103415012359619, "learning_rate": 5.269070735090153e-06, "loss": 0.0159, "step": 6832 }, { "epoch": 4.733633529615518, "grad_norm": 0.5258544683456421, "learning_rate": 5.2683772538141475e-06, "loss": 0.0198, "step": 6833 }, { "epoch": 4.734326290266713, "grad_norm": 0.4566044807434082, "learning_rate": 5.2676837725381425e-06, "loss": 0.0131, "step": 6834 }, { "epoch": 4.7350190509179075, "grad_norm": 0.4631848931312561, "learning_rate": 5.2669902912621365e-06, "loss": 0.017, "step": 6835 }, { "epoch": 4.735711811569103, "grad_norm": 0.4678950011730194, "learning_rate": 5.2662968099861315e-06, "loss": 0.0181, "step": 6836 }, { "epoch": 4.736404572220298, "grad_norm": 0.5638415217399597, "learning_rate": 5.265603328710125e-06, "loss": 0.0173, "step": 6837 }, { "epoch": 4.7370973328714925, "grad_norm": 0.49170637130737305, "learning_rate": 5.26490984743412e-06, "loss": 0.0205, "step": 6838 }, { "epoch": 4.737790093522688, "grad_norm": 0.6220834851264954, "learning_rate": 5.264216366158115e-06, "loss": 0.0296, "step": 6839 }, { "epoch": 4.738482854173883, "grad_norm": 0.36823028326034546, "learning_rate": 5.263522884882109e-06, "loss": 0.0127, "step": 6840 }, { "epoch": 4.739175614825078, "grad_norm": 0.4995688796043396, "learning_rate": 5.262829403606104e-06, "loss": 0.013, "step": 6841 }, { "epoch": 4.739868375476273, "grad_norm": 0.33165818452835083, "learning_rate": 5.262135922330097e-06, "loss": 0.0118, "step": 6842 }, { "epoch": 4.740561136127468, "grad_norm": 0.6700669527053833, "learning_rate": 5.261442441054092e-06, "loss": 0.0161, "step": 6843 }, { "epoch": 4.741253896778663, "grad_norm": 0.49722665548324585, "learning_rate": 5.260748959778087e-06, "loss": 0.0206, "step": 6844 }, { "epoch": 4.741946657429858, "grad_norm": 0.41947928071022034, "learning_rate": 5.260055478502081e-06, "loss": 0.0148, "step": 6845 }, { "epoch": 4.742639418081053, "grad_norm": 0.37966054677963257, "learning_rate": 5.259361997226076e-06, "loss": 0.01, "step": 6846 }, { "epoch": 4.743332178732248, "grad_norm": 0.4666665494441986, "learning_rate": 5.25866851595007e-06, "loss": 0.0187, "step": 6847 }, { "epoch": 4.7440249393834435, "grad_norm": 0.36083927750587463, "learning_rate": 5.257975034674065e-06, "loss": 0.013, "step": 6848 }, { "epoch": 4.744717700034638, "grad_norm": 0.5228288769721985, "learning_rate": 5.257281553398059e-06, "loss": 0.0176, "step": 6849 }, { "epoch": 4.745410460685833, "grad_norm": 0.4528660476207733, "learning_rate": 5.256588072122053e-06, "loss": 0.0159, "step": 6850 }, { "epoch": 4.746103221337028, "grad_norm": 0.5029258131980896, "learning_rate": 5.255894590846048e-06, "loss": 0.0217, "step": 6851 }, { "epoch": 4.746795981988223, "grad_norm": 0.4450753629207611, "learning_rate": 5.255201109570042e-06, "loss": 0.0106, "step": 6852 }, { "epoch": 4.747488742639418, "grad_norm": 0.657798707485199, "learning_rate": 5.254507628294037e-06, "loss": 0.0136, "step": 6853 }, { "epoch": 4.748181503290613, "grad_norm": 0.5315065383911133, "learning_rate": 5.253814147018032e-06, "loss": 0.0134, "step": 6854 }, { "epoch": 4.748874263941808, "grad_norm": 0.5781354308128357, "learning_rate": 5.253120665742025e-06, "loss": 0.0173, "step": 6855 }, { "epoch": 4.749567024593003, "grad_norm": 0.42641496658325195, "learning_rate": 5.25242718446602e-06, "loss": 0.0167, "step": 6856 }, { "epoch": 4.750259785244198, "grad_norm": 0.7987641096115112, "learning_rate": 5.251733703190014e-06, "loss": 0.0242, "step": 6857 }, { "epoch": 4.750952545895393, "grad_norm": 0.5275469422340393, "learning_rate": 5.251040221914009e-06, "loss": 0.013, "step": 6858 }, { "epoch": 4.751645306546588, "grad_norm": 0.4702892601490021, "learning_rate": 5.250346740638004e-06, "loss": 0.0181, "step": 6859 }, { "epoch": 4.752338067197783, "grad_norm": 0.5504516363143921, "learning_rate": 5.249653259361997e-06, "loss": 0.015, "step": 6860 }, { "epoch": 4.753030827848978, "grad_norm": 0.567467451095581, "learning_rate": 5.248959778085992e-06, "loss": 0.0199, "step": 6861 }, { "epoch": 4.753723588500173, "grad_norm": 0.9012731313705444, "learning_rate": 5.248266296809986e-06, "loss": 0.0158, "step": 6862 }, { "epoch": 4.754416349151368, "grad_norm": 0.48847752809524536, "learning_rate": 5.247572815533981e-06, "loss": 0.021, "step": 6863 }, { "epoch": 4.7551091098025635, "grad_norm": 0.5339195728302002, "learning_rate": 5.246879334257975e-06, "loss": 0.0173, "step": 6864 }, { "epoch": 4.755801870453758, "grad_norm": 0.5476733446121216, "learning_rate": 5.24618585298197e-06, "loss": 0.0192, "step": 6865 }, { "epoch": 4.756494631104953, "grad_norm": 0.46313193440437317, "learning_rate": 5.245492371705965e-06, "loss": 0.0139, "step": 6866 }, { "epoch": 4.7571873917561485, "grad_norm": 0.4554114043712616, "learning_rate": 5.244798890429958e-06, "loss": 0.0115, "step": 6867 }, { "epoch": 4.757880152407344, "grad_norm": 0.43525463342666626, "learning_rate": 5.244105409153953e-06, "loss": 0.0115, "step": 6868 }, { "epoch": 4.758572913058538, "grad_norm": 0.5824906229972839, "learning_rate": 5.243411927877947e-06, "loss": 0.0157, "step": 6869 }, { "epoch": 4.759265673709733, "grad_norm": 0.3750251531600952, "learning_rate": 5.242718446601942e-06, "loss": 0.0133, "step": 6870 }, { "epoch": 4.759958434360929, "grad_norm": 0.5041017532348633, "learning_rate": 5.242024965325937e-06, "loss": 0.0196, "step": 6871 }, { "epoch": 4.760651195012123, "grad_norm": 0.6041717529296875, "learning_rate": 5.2413314840499304e-06, "loss": 0.0216, "step": 6872 }, { "epoch": 4.761343955663318, "grad_norm": 0.4168316423892975, "learning_rate": 5.240638002773925e-06, "loss": 0.0128, "step": 6873 }, { "epoch": 4.762036716314514, "grad_norm": 0.5820544958114624, "learning_rate": 5.2399445214979195e-06, "loss": 0.0156, "step": 6874 }, { "epoch": 4.762729476965708, "grad_norm": 0.6230272054672241, "learning_rate": 5.239251040221914e-06, "loss": 0.0181, "step": 6875 }, { "epoch": 4.763422237616903, "grad_norm": 0.3964560031890869, "learning_rate": 5.238557558945909e-06, "loss": 0.0165, "step": 6876 }, { "epoch": 4.764114998268099, "grad_norm": 0.4693322777748108, "learning_rate": 5.237864077669903e-06, "loss": 0.0143, "step": 6877 }, { "epoch": 4.764807758919293, "grad_norm": 0.5114544630050659, "learning_rate": 5.2371705963938975e-06, "loss": 0.0161, "step": 6878 }, { "epoch": 4.765500519570488, "grad_norm": 0.443996399641037, "learning_rate": 5.236477115117892e-06, "loss": 0.011, "step": 6879 }, { "epoch": 4.7661932802216835, "grad_norm": 0.5216764807701111, "learning_rate": 5.2357836338418865e-06, "loss": 0.0161, "step": 6880 }, { "epoch": 4.766886040872878, "grad_norm": 0.5465784072875977, "learning_rate": 5.2350901525658815e-06, "loss": 0.021, "step": 6881 }, { "epoch": 4.767578801524073, "grad_norm": 0.45073312520980835, "learning_rate": 5.2343966712898755e-06, "loss": 0.0155, "step": 6882 }, { "epoch": 4.7682715621752685, "grad_norm": 0.4280548393726349, "learning_rate": 5.2337031900138705e-06, "loss": 0.0145, "step": 6883 }, { "epoch": 4.768964322826464, "grad_norm": 0.5377956628799438, "learning_rate": 5.233009708737864e-06, "loss": 0.0135, "step": 6884 }, { "epoch": 4.769657083477658, "grad_norm": 0.5947824716567993, "learning_rate": 5.232316227461859e-06, "loss": 0.0206, "step": 6885 }, { "epoch": 4.770349844128853, "grad_norm": 0.8097753524780273, "learning_rate": 5.231622746185854e-06, "loss": 0.0185, "step": 6886 }, { "epoch": 4.771042604780049, "grad_norm": 0.3802036941051483, "learning_rate": 5.230929264909848e-06, "loss": 0.0115, "step": 6887 }, { "epoch": 4.771735365431244, "grad_norm": 0.41896852850914, "learning_rate": 5.230235783633843e-06, "loss": 0.0131, "step": 6888 }, { "epoch": 4.772428126082438, "grad_norm": 0.5791265964508057, "learning_rate": 5.229542302357836e-06, "loss": 0.0155, "step": 6889 }, { "epoch": 4.773120886733634, "grad_norm": 0.5431563854217529, "learning_rate": 5.228848821081831e-06, "loss": 0.0184, "step": 6890 }, { "epoch": 4.773813647384829, "grad_norm": 0.4802630841732025, "learning_rate": 5.228155339805826e-06, "loss": 0.0169, "step": 6891 }, { "epoch": 4.774506408036023, "grad_norm": 0.42098310589790344, "learning_rate": 5.22746185852982e-06, "loss": 0.0128, "step": 6892 }, { "epoch": 4.775199168687219, "grad_norm": 0.4361574351787567, "learning_rate": 5.226768377253815e-06, "loss": 0.012, "step": 6893 }, { "epoch": 4.775891929338414, "grad_norm": 0.6751755475997925, "learning_rate": 5.226074895977809e-06, "loss": 0.0217, "step": 6894 }, { "epoch": 4.776584689989608, "grad_norm": 0.5671015381813049, "learning_rate": 5.225381414701804e-06, "loss": 0.0179, "step": 6895 }, { "epoch": 4.777277450640804, "grad_norm": 0.5791196823120117, "learning_rate": 5.224687933425799e-06, "loss": 0.019, "step": 6896 }, { "epoch": 4.777970211291999, "grad_norm": 0.483121782541275, "learning_rate": 5.223994452149792e-06, "loss": 0.0154, "step": 6897 }, { "epoch": 4.778662971943193, "grad_norm": 0.48173636198043823, "learning_rate": 5.223300970873787e-06, "loss": 0.0197, "step": 6898 }, { "epoch": 4.7793557325943885, "grad_norm": 0.6470112800598145, "learning_rate": 5.222607489597781e-06, "loss": 0.0157, "step": 6899 }, { "epoch": 4.780048493245584, "grad_norm": 0.4605175256729126, "learning_rate": 5.221914008321776e-06, "loss": 0.0181, "step": 6900 }, { "epoch": 4.780741253896778, "grad_norm": 0.4896812438964844, "learning_rate": 5.221220527045771e-06, "loss": 0.0179, "step": 6901 }, { "epoch": 4.7814340145479735, "grad_norm": 0.6870664358139038, "learning_rate": 5.220527045769764e-06, "loss": 0.0221, "step": 6902 }, { "epoch": 4.782126775199169, "grad_norm": 0.47149863839149475, "learning_rate": 5.219833564493759e-06, "loss": 0.0147, "step": 6903 }, { "epoch": 4.782819535850364, "grad_norm": 0.5091636776924133, "learning_rate": 5.219140083217753e-06, "loss": 0.018, "step": 6904 }, { "epoch": 4.783512296501558, "grad_norm": 0.4500809907913208, "learning_rate": 5.218446601941748e-06, "loss": 0.0129, "step": 6905 }, { "epoch": 4.784205057152754, "grad_norm": 0.4145751893520355, "learning_rate": 5.217753120665743e-06, "loss": 0.0162, "step": 6906 }, { "epoch": 4.784897817803949, "grad_norm": 0.594142496585846, "learning_rate": 5.217059639389736e-06, "loss": 0.011, "step": 6907 }, { "epoch": 4.785590578455144, "grad_norm": 0.7651152014732361, "learning_rate": 5.216366158113731e-06, "loss": 0.0222, "step": 6908 }, { "epoch": 4.786283339106339, "grad_norm": 0.5690034031867981, "learning_rate": 5.215672676837725e-06, "loss": 0.018, "step": 6909 }, { "epoch": 4.786976099757534, "grad_norm": 0.5289434790611267, "learning_rate": 5.21497919556172e-06, "loss": 0.0154, "step": 6910 }, { "epoch": 4.787668860408729, "grad_norm": 0.4769068658351898, "learning_rate": 5.214285714285715e-06, "loss": 0.0142, "step": 6911 }, { "epoch": 4.788361621059924, "grad_norm": 0.5074408054351807, "learning_rate": 5.213592233009709e-06, "loss": 0.0188, "step": 6912 }, { "epoch": 4.789054381711119, "grad_norm": 0.536114513874054, "learning_rate": 5.212898751733704e-06, "loss": 0.0205, "step": 6913 }, { "epoch": 4.789747142362314, "grad_norm": 0.4882396459579468, "learning_rate": 5.212205270457697e-06, "loss": 0.0151, "step": 6914 }, { "epoch": 4.7904399030135085, "grad_norm": 0.5209609866142273, "learning_rate": 5.211511789181692e-06, "loss": 0.0219, "step": 6915 }, { "epoch": 4.791132663664704, "grad_norm": 0.577211856842041, "learning_rate": 5.210818307905687e-06, "loss": 0.0189, "step": 6916 }, { "epoch": 4.791825424315899, "grad_norm": 0.8125247359275818, "learning_rate": 5.210124826629681e-06, "loss": 0.015, "step": 6917 }, { "epoch": 4.7925181849670935, "grad_norm": 0.662858247756958, "learning_rate": 5.209431345353676e-06, "loss": 0.0147, "step": 6918 }, { "epoch": 4.793210945618289, "grad_norm": 0.37163200974464417, "learning_rate": 5.2087378640776694e-06, "loss": 0.0126, "step": 6919 }, { "epoch": 4.793903706269484, "grad_norm": 0.49915891885757446, "learning_rate": 5.208044382801664e-06, "loss": 0.0135, "step": 6920 }, { "epoch": 4.794596466920678, "grad_norm": 0.44924038648605347, "learning_rate": 5.207350901525659e-06, "loss": 0.0153, "step": 6921 }, { "epoch": 4.795289227571874, "grad_norm": 0.42528730630874634, "learning_rate": 5.206657420249653e-06, "loss": 0.0144, "step": 6922 }, { "epoch": 4.795981988223069, "grad_norm": 0.5404415130615234, "learning_rate": 5.205963938973648e-06, "loss": 0.0193, "step": 6923 }, { "epoch": 4.796674748874264, "grad_norm": 0.4966762661933899, "learning_rate": 5.205270457697642e-06, "loss": 0.0155, "step": 6924 }, { "epoch": 4.797367509525459, "grad_norm": 0.5383783578872681, "learning_rate": 5.204576976421637e-06, "loss": 0.02, "step": 6925 }, { "epoch": 4.798060270176654, "grad_norm": 0.4352760910987854, "learning_rate": 5.2038834951456314e-06, "loss": 0.0146, "step": 6926 }, { "epoch": 4.798753030827849, "grad_norm": 0.5450789928436279, "learning_rate": 5.2031900138696255e-06, "loss": 0.0191, "step": 6927 }, { "epoch": 4.7994457914790445, "grad_norm": 0.601810097694397, "learning_rate": 5.2024965325936205e-06, "loss": 0.0205, "step": 6928 }, { "epoch": 4.800138552130239, "grad_norm": 0.6019473075866699, "learning_rate": 5.2018030513176145e-06, "loss": 0.0149, "step": 6929 }, { "epoch": 4.800831312781434, "grad_norm": 0.43512487411499023, "learning_rate": 5.2011095700416095e-06, "loss": 0.0159, "step": 6930 }, { "epoch": 4.801524073432629, "grad_norm": 0.5419918894767761, "learning_rate": 5.200416088765604e-06, "loss": 0.017, "step": 6931 }, { "epoch": 4.802216834083824, "grad_norm": 0.5038084387779236, "learning_rate": 5.199722607489598e-06, "loss": 0.0173, "step": 6932 }, { "epoch": 4.802909594735019, "grad_norm": 0.5100458264350891, "learning_rate": 5.199029126213593e-06, "loss": 0.0205, "step": 6933 }, { "epoch": 4.803602355386214, "grad_norm": 0.47111448645591736, "learning_rate": 5.198335644937587e-06, "loss": 0.0174, "step": 6934 }, { "epoch": 4.804295116037409, "grad_norm": 0.46273452043533325, "learning_rate": 5.197642163661582e-06, "loss": 0.0153, "step": 6935 }, { "epoch": 4.804987876688604, "grad_norm": 0.5884197950363159, "learning_rate": 5.1969486823855765e-06, "loss": 0.0225, "step": 6936 }, { "epoch": 4.805680637339799, "grad_norm": 0.5098763704299927, "learning_rate": 5.19625520110957e-06, "loss": 0.0145, "step": 6937 }, { "epoch": 4.806373397990994, "grad_norm": 0.500295877456665, "learning_rate": 5.195561719833565e-06, "loss": 0.0211, "step": 6938 }, { "epoch": 4.807066158642189, "grad_norm": 0.505456268787384, "learning_rate": 5.194868238557559e-06, "loss": 0.0145, "step": 6939 }, { "epoch": 4.807758919293384, "grad_norm": 0.44865840673446655, "learning_rate": 5.194174757281554e-06, "loss": 0.0151, "step": 6940 }, { "epoch": 4.808451679944579, "grad_norm": 0.4600048065185547, "learning_rate": 5.193481276005549e-06, "loss": 0.0133, "step": 6941 }, { "epoch": 4.809144440595774, "grad_norm": 0.764250636100769, "learning_rate": 5.192787794729543e-06, "loss": 0.0129, "step": 6942 }, { "epoch": 4.809837201246969, "grad_norm": 0.4583585560321808, "learning_rate": 5.192094313453538e-06, "loss": 0.0131, "step": 6943 }, { "epoch": 4.8105299618981645, "grad_norm": 0.4157699644565582, "learning_rate": 5.191400832177531e-06, "loss": 0.0114, "step": 6944 }, { "epoch": 4.811222722549359, "grad_norm": 0.62359219789505, "learning_rate": 5.190707350901526e-06, "loss": 0.018, "step": 6945 }, { "epoch": 4.811915483200554, "grad_norm": 0.37869542837142944, "learning_rate": 5.190013869625521e-06, "loss": 0.0141, "step": 6946 }, { "epoch": 4.8126082438517495, "grad_norm": 0.42418041825294495, "learning_rate": 5.189320388349515e-06, "loss": 0.0133, "step": 6947 }, { "epoch": 4.813301004502944, "grad_norm": 0.4623964726924896, "learning_rate": 5.18862690707351e-06, "loss": 0.0139, "step": 6948 }, { "epoch": 4.813993765154139, "grad_norm": 0.5853286385536194, "learning_rate": 5.187933425797503e-06, "loss": 0.0159, "step": 6949 }, { "epoch": 4.814686525805334, "grad_norm": 0.5568832159042358, "learning_rate": 5.187239944521498e-06, "loss": 0.0184, "step": 6950 }, { "epoch": 4.81537928645653, "grad_norm": 0.48901286721229553, "learning_rate": 5.186546463245493e-06, "loss": 0.0193, "step": 6951 }, { "epoch": 4.816072047107724, "grad_norm": 0.4346074163913727, "learning_rate": 5.185852981969487e-06, "loss": 0.012, "step": 6952 }, { "epoch": 4.816764807758919, "grad_norm": 0.6015490293502808, "learning_rate": 5.185159500693482e-06, "loss": 0.0261, "step": 6953 }, { "epoch": 4.817457568410115, "grad_norm": 0.46684080362319946, "learning_rate": 5.184466019417476e-06, "loss": 0.0147, "step": 6954 }, { "epoch": 4.818150329061309, "grad_norm": 0.5361834764480591, "learning_rate": 5.18377253814147e-06, "loss": 0.0208, "step": 6955 }, { "epoch": 4.818843089712504, "grad_norm": 0.4664507210254669, "learning_rate": 5.183079056865465e-06, "loss": 0.0181, "step": 6956 }, { "epoch": 4.8195358503637, "grad_norm": 0.5465168356895447, "learning_rate": 5.182385575589459e-06, "loss": 0.0181, "step": 6957 }, { "epoch": 4.820228611014894, "grad_norm": 0.3864993155002594, "learning_rate": 5.181692094313454e-06, "loss": 0.0111, "step": 6958 }, { "epoch": 4.820921371666089, "grad_norm": 0.4204265773296356, "learning_rate": 5.180998613037448e-06, "loss": 0.013, "step": 6959 }, { "epoch": 4.8216141323172845, "grad_norm": 0.6744337677955627, "learning_rate": 5.180305131761443e-06, "loss": 0.0253, "step": 6960 }, { "epoch": 4.822306892968479, "grad_norm": 0.3722931146621704, "learning_rate": 5.179611650485438e-06, "loss": 0.0125, "step": 6961 }, { "epoch": 4.822999653619674, "grad_norm": 0.6089905500411987, "learning_rate": 5.178918169209431e-06, "loss": 0.0189, "step": 6962 }, { "epoch": 4.8236924142708695, "grad_norm": 0.5528891086578369, "learning_rate": 5.178224687933426e-06, "loss": 0.0186, "step": 6963 }, { "epoch": 4.824385174922065, "grad_norm": 0.4733254909515381, "learning_rate": 5.17753120665742e-06, "loss": 0.0139, "step": 6964 }, { "epoch": 4.825077935573259, "grad_norm": 0.38616856932640076, "learning_rate": 5.176837725381415e-06, "loss": 0.0118, "step": 6965 }, { "epoch": 4.825770696224454, "grad_norm": 0.4355665445327759, "learning_rate": 5.17614424410541e-06, "loss": 0.0178, "step": 6966 }, { "epoch": 4.82646345687565, "grad_norm": 0.5209665894508362, "learning_rate": 5.175450762829403e-06, "loss": 0.014, "step": 6967 }, { "epoch": 4.827156217526844, "grad_norm": 0.996882438659668, "learning_rate": 5.174757281553398e-06, "loss": 0.0213, "step": 6968 }, { "epoch": 4.827848978178039, "grad_norm": 0.5259845852851868, "learning_rate": 5.174063800277392e-06, "loss": 0.0284, "step": 6969 }, { "epoch": 4.828541738829235, "grad_norm": 0.49833589792251587, "learning_rate": 5.173370319001387e-06, "loss": 0.0218, "step": 6970 }, { "epoch": 4.82923449948043, "grad_norm": 0.4214405119419098, "learning_rate": 5.172676837725382e-06, "loss": 0.0121, "step": 6971 }, { "epoch": 4.829927260131624, "grad_norm": 0.571959376335144, "learning_rate": 5.171983356449376e-06, "loss": 0.0165, "step": 6972 }, { "epoch": 4.83062002078282, "grad_norm": 0.5179031491279602, "learning_rate": 5.171289875173371e-06, "loss": 0.0218, "step": 6973 }, { "epoch": 4.831312781434015, "grad_norm": 0.43725210428237915, "learning_rate": 5.1705963938973645e-06, "loss": 0.0135, "step": 6974 }, { "epoch": 4.832005542085209, "grad_norm": 0.40078112483024597, "learning_rate": 5.1699029126213595e-06, "loss": 0.0111, "step": 6975 }, { "epoch": 4.8326983027364045, "grad_norm": 0.4938311278820038, "learning_rate": 5.169209431345354e-06, "loss": 0.0126, "step": 6976 }, { "epoch": 4.8333910633876, "grad_norm": 0.7092520594596863, "learning_rate": 5.1685159500693485e-06, "loss": 0.0227, "step": 6977 }, { "epoch": 4.834083824038794, "grad_norm": 0.5861003398895264, "learning_rate": 5.167822468793343e-06, "loss": 0.0172, "step": 6978 }, { "epoch": 4.8347765846899895, "grad_norm": 0.5506494641304016, "learning_rate": 5.167128987517337e-06, "loss": 0.0158, "step": 6979 }, { "epoch": 4.835469345341185, "grad_norm": 0.5054938793182373, "learning_rate": 5.166435506241332e-06, "loss": 0.0178, "step": 6980 }, { "epoch": 4.836162105992379, "grad_norm": 0.3828156888484955, "learning_rate": 5.1657420249653265e-06, "loss": 0.0136, "step": 6981 }, { "epoch": 4.8368548666435744, "grad_norm": 0.610649824142456, "learning_rate": 5.165048543689321e-06, "loss": 0.0203, "step": 6982 }, { "epoch": 4.83754762729477, "grad_norm": 0.5363022685050964, "learning_rate": 5.1643550624133155e-06, "loss": 0.0196, "step": 6983 }, { "epoch": 4.838240387945965, "grad_norm": 0.41401076316833496, "learning_rate": 5.163661581137309e-06, "loss": 0.0174, "step": 6984 }, { "epoch": 4.838933148597159, "grad_norm": 0.584434986114502, "learning_rate": 5.162968099861304e-06, "loss": 0.0152, "step": 6985 }, { "epoch": 4.839625909248355, "grad_norm": 0.5189743041992188, "learning_rate": 5.162274618585299e-06, "loss": 0.016, "step": 6986 }, { "epoch": 4.84031866989955, "grad_norm": 0.42707058787345886, "learning_rate": 5.161581137309293e-06, "loss": 0.0183, "step": 6987 }, { "epoch": 4.841011430550744, "grad_norm": 0.5152461528778076, "learning_rate": 5.160887656033288e-06, "loss": 0.0144, "step": 6988 }, { "epoch": 4.84170419120194, "grad_norm": 0.5386325120925903, "learning_rate": 5.160194174757282e-06, "loss": 0.0212, "step": 6989 }, { "epoch": 4.842396951853135, "grad_norm": 0.4806686043739319, "learning_rate": 5.159500693481277e-06, "loss": 0.019, "step": 6990 }, { "epoch": 4.84308971250433, "grad_norm": 0.48570263385772705, "learning_rate": 5.158807212205272e-06, "loss": 0.0184, "step": 6991 }, { "epoch": 4.843782473155525, "grad_norm": 0.45237165689468384, "learning_rate": 5.158113730929265e-06, "loss": 0.0127, "step": 6992 }, { "epoch": 4.84447523380672, "grad_norm": 0.5239438414573669, "learning_rate": 5.15742024965326e-06, "loss": 0.0167, "step": 6993 }, { "epoch": 4.845167994457915, "grad_norm": 0.5638341307640076, "learning_rate": 5.156726768377254e-06, "loss": 0.0243, "step": 6994 }, { "epoch": 4.8458607551091095, "grad_norm": 0.41066622734069824, "learning_rate": 5.156033287101249e-06, "loss": 0.0123, "step": 6995 }, { "epoch": 4.846553515760305, "grad_norm": 0.47895199060440063, "learning_rate": 5.155339805825244e-06, "loss": 0.0157, "step": 6996 }, { "epoch": 4.8472462764115, "grad_norm": 0.4531351327896118, "learning_rate": 5.154646324549237e-06, "loss": 0.0139, "step": 6997 }, { "epoch": 4.8479390370626945, "grad_norm": 0.43267518281936646, "learning_rate": 5.153952843273232e-06, "loss": 0.0165, "step": 6998 }, { "epoch": 4.84863179771389, "grad_norm": 0.49868276715278625, "learning_rate": 5.153259361997226e-06, "loss": 0.0131, "step": 6999 }, { "epoch": 4.849324558365085, "grad_norm": 0.3782249093055725, "learning_rate": 5.152565880721221e-06, "loss": 0.0114, "step": 7000 }, { "epoch": 4.850017319016279, "grad_norm": 0.4751749038696289, "learning_rate": 5.151872399445216e-06, "loss": 0.0144, "step": 7001 }, { "epoch": 4.850710079667475, "grad_norm": 0.4537191092967987, "learning_rate": 5.15117891816921e-06, "loss": 0.0148, "step": 7002 }, { "epoch": 4.85140284031867, "grad_norm": 0.5568353533744812, "learning_rate": 5.150485436893204e-06, "loss": 0.0181, "step": 7003 }, { "epoch": 4.852095600969865, "grad_norm": 0.360664963722229, "learning_rate": 5.149791955617198e-06, "loss": 0.0098, "step": 7004 }, { "epoch": 4.85278836162106, "grad_norm": 0.4307069182395935, "learning_rate": 5.149098474341193e-06, "loss": 0.0198, "step": 7005 }, { "epoch": 4.853481122272255, "grad_norm": 0.5229548811912537, "learning_rate": 5.148404993065188e-06, "loss": 0.0174, "step": 7006 }, { "epoch": 4.85417388292345, "grad_norm": 1.091782569885254, "learning_rate": 5.147711511789182e-06, "loss": 0.0196, "step": 7007 }, { "epoch": 4.854866643574645, "grad_norm": 0.36841440200805664, "learning_rate": 5.147018030513177e-06, "loss": 0.0113, "step": 7008 }, { "epoch": 4.85555940422584, "grad_norm": 0.5098832845687866, "learning_rate": 5.14632454923717e-06, "loss": 0.0131, "step": 7009 }, { "epoch": 4.856252164877035, "grad_norm": 0.534648597240448, "learning_rate": 5.145631067961165e-06, "loss": 0.0194, "step": 7010 }, { "epoch": 4.85694492552823, "grad_norm": 0.6079515814781189, "learning_rate": 5.14493758668516e-06, "loss": 0.0164, "step": 7011 }, { "epoch": 4.857637686179425, "grad_norm": 0.5154495239257812, "learning_rate": 5.144244105409154e-06, "loss": 0.0174, "step": 7012 }, { "epoch": 4.85833044683062, "grad_norm": 0.5022515058517456, "learning_rate": 5.143550624133149e-06, "loss": 0.0188, "step": 7013 }, { "epoch": 4.859023207481815, "grad_norm": 0.4352777302265167, "learning_rate": 5.142857142857142e-06, "loss": 0.0135, "step": 7014 }, { "epoch": 4.85971596813301, "grad_norm": 0.5344070196151733, "learning_rate": 5.142163661581137e-06, "loss": 0.0154, "step": 7015 }, { "epoch": 4.860408728784205, "grad_norm": 0.45540904998779297, "learning_rate": 5.141470180305132e-06, "loss": 0.0136, "step": 7016 }, { "epoch": 4.8611014894354, "grad_norm": 0.5667386054992676, "learning_rate": 5.140776699029126e-06, "loss": 0.0177, "step": 7017 }, { "epoch": 4.861794250086595, "grad_norm": 0.7090626358985901, "learning_rate": 5.140083217753121e-06, "loss": 0.0235, "step": 7018 }, { "epoch": 4.86248701073779, "grad_norm": 0.5996568202972412, "learning_rate": 5.139389736477115e-06, "loss": 0.0163, "step": 7019 }, { "epoch": 4.863179771388985, "grad_norm": 0.731913149356842, "learning_rate": 5.13869625520111e-06, "loss": 0.0196, "step": 7020 }, { "epoch": 4.86387253204018, "grad_norm": 0.7054145932197571, "learning_rate": 5.138002773925105e-06, "loss": 0.0182, "step": 7021 }, { "epoch": 4.864565292691375, "grad_norm": 0.5450630187988281, "learning_rate": 5.1373092926490985e-06, "loss": 0.0181, "step": 7022 }, { "epoch": 4.86525805334257, "grad_norm": 0.5680803060531616, "learning_rate": 5.136615811373093e-06, "loss": 0.0225, "step": 7023 }, { "epoch": 4.8659508139937655, "grad_norm": 0.43003177642822266, "learning_rate": 5.1359223300970875e-06, "loss": 0.0191, "step": 7024 }, { "epoch": 4.86664357464496, "grad_norm": 0.4550376832485199, "learning_rate": 5.135228848821082e-06, "loss": 0.014, "step": 7025 }, { "epoch": 4.867336335296155, "grad_norm": 0.46195662021636963, "learning_rate": 5.134535367545077e-06, "loss": 0.0146, "step": 7026 }, { "epoch": 4.8680290959473504, "grad_norm": 0.4369540512561798, "learning_rate": 5.133841886269071e-06, "loss": 0.0121, "step": 7027 }, { "epoch": 4.868721856598545, "grad_norm": 0.5560098886489868, "learning_rate": 5.1331484049930655e-06, "loss": 0.0205, "step": 7028 }, { "epoch": 4.86941461724974, "grad_norm": 0.5415427088737488, "learning_rate": 5.13245492371706e-06, "loss": 0.0164, "step": 7029 }, { "epoch": 4.870107377900935, "grad_norm": 0.6134937405586243, "learning_rate": 5.1317614424410545e-06, "loss": 0.0154, "step": 7030 }, { "epoch": 4.870800138552131, "grad_norm": 0.41532644629478455, "learning_rate": 5.1310679611650495e-06, "loss": 0.0137, "step": 7031 }, { "epoch": 4.871492899203325, "grad_norm": 0.34864580631256104, "learning_rate": 5.130374479889043e-06, "loss": 0.0122, "step": 7032 }, { "epoch": 4.87218565985452, "grad_norm": 0.5494161248207092, "learning_rate": 5.129680998613038e-06, "loss": 0.0199, "step": 7033 }, { "epoch": 4.872878420505716, "grad_norm": 0.6478584408760071, "learning_rate": 5.128987517337032e-06, "loss": 0.0191, "step": 7034 }, { "epoch": 4.87357118115691, "grad_norm": 0.43007680773735046, "learning_rate": 5.128294036061027e-06, "loss": 0.016, "step": 7035 }, { "epoch": 4.874263941808105, "grad_norm": 0.5305352210998535, "learning_rate": 5.127600554785022e-06, "loss": 0.0202, "step": 7036 }, { "epoch": 4.874956702459301, "grad_norm": 0.4242437481880188, "learning_rate": 5.126907073509016e-06, "loss": 0.0154, "step": 7037 }, { "epoch": 4.875649463110495, "grad_norm": 0.4685125946998596, "learning_rate": 5.126213592233011e-06, "loss": 0.0175, "step": 7038 }, { "epoch": 4.87634222376169, "grad_norm": 0.5354596972465515, "learning_rate": 5.125520110957004e-06, "loss": 0.0256, "step": 7039 }, { "epoch": 4.8770349844128855, "grad_norm": 0.6412429809570312, "learning_rate": 5.124826629680999e-06, "loss": 0.0184, "step": 7040 }, { "epoch": 4.87772774506408, "grad_norm": 0.4054080843925476, "learning_rate": 5.124133148404994e-06, "loss": 0.0148, "step": 7041 }, { "epoch": 4.878420505715275, "grad_norm": 0.4576794505119324, "learning_rate": 5.123439667128988e-06, "loss": 0.0165, "step": 7042 }, { "epoch": 4.8791132663664705, "grad_norm": 0.5380566120147705, "learning_rate": 5.122746185852983e-06, "loss": 0.0181, "step": 7043 }, { "epoch": 4.879806027017666, "grad_norm": 0.477628231048584, "learning_rate": 5.122052704576976e-06, "loss": 0.0243, "step": 7044 }, { "epoch": 4.88049878766886, "grad_norm": 0.7909729480743408, "learning_rate": 5.121359223300971e-06, "loss": 0.0232, "step": 7045 }, { "epoch": 4.881191548320055, "grad_norm": 0.7681084871292114, "learning_rate": 5.120665742024966e-06, "loss": 0.0165, "step": 7046 }, { "epoch": 4.881884308971251, "grad_norm": 0.49574559926986694, "learning_rate": 5.11997226074896e-06, "loss": 0.0181, "step": 7047 }, { "epoch": 4.882577069622445, "grad_norm": 0.46113428473472595, "learning_rate": 5.119278779472955e-06, "loss": 0.0126, "step": 7048 }, { "epoch": 4.88326983027364, "grad_norm": 0.5326622128486633, "learning_rate": 5.118585298196949e-06, "loss": 0.0183, "step": 7049 }, { "epoch": 4.883962590924836, "grad_norm": 0.4911978244781494, "learning_rate": 5.117891816920944e-06, "loss": 0.0113, "step": 7050 }, { "epoch": 4.884655351576031, "grad_norm": 0.5054808855056763, "learning_rate": 5.117198335644938e-06, "loss": 0.019, "step": 7051 }, { "epoch": 4.885348112227225, "grad_norm": 0.5193711519241333, "learning_rate": 5.116504854368932e-06, "loss": 0.0182, "step": 7052 }, { "epoch": 4.886040872878421, "grad_norm": 0.49985775351524353, "learning_rate": 5.115811373092927e-06, "loss": 0.0205, "step": 7053 }, { "epoch": 4.886733633529616, "grad_norm": 0.5126142501831055, "learning_rate": 5.115117891816921e-06, "loss": 0.016, "step": 7054 }, { "epoch": 4.88742639418081, "grad_norm": 0.41101741790771484, "learning_rate": 5.114424410540916e-06, "loss": 0.0127, "step": 7055 }, { "epoch": 4.8881191548320055, "grad_norm": 0.5187044143676758, "learning_rate": 5.113730929264911e-06, "loss": 0.0124, "step": 7056 }, { "epoch": 4.888811915483201, "grad_norm": 0.6034947633743286, "learning_rate": 5.113037447988904e-06, "loss": 0.0239, "step": 7057 }, { "epoch": 4.889504676134395, "grad_norm": 0.5134703516960144, "learning_rate": 5.112343966712899e-06, "loss": 0.0142, "step": 7058 }, { "epoch": 4.8901974367855905, "grad_norm": 0.5486952066421509, "learning_rate": 5.111650485436893e-06, "loss": 0.0214, "step": 7059 }, { "epoch": 4.890890197436786, "grad_norm": 0.5766928791999817, "learning_rate": 5.110957004160888e-06, "loss": 0.0163, "step": 7060 }, { "epoch": 4.89158295808798, "grad_norm": 0.5219733119010925, "learning_rate": 5.110263522884883e-06, "loss": 0.0164, "step": 7061 }, { "epoch": 4.892275718739175, "grad_norm": 0.44417136907577515, "learning_rate": 5.109570041608876e-06, "loss": 0.0161, "step": 7062 }, { "epoch": 4.892968479390371, "grad_norm": 0.5025767683982849, "learning_rate": 5.108876560332871e-06, "loss": 0.0152, "step": 7063 }, { "epoch": 4.893661240041566, "grad_norm": 0.5078673958778381, "learning_rate": 5.108183079056865e-06, "loss": 0.0146, "step": 7064 }, { "epoch": 4.89435400069276, "grad_norm": 0.5547715425491333, "learning_rate": 5.10748959778086e-06, "loss": 0.0159, "step": 7065 }, { "epoch": 4.895046761343956, "grad_norm": 0.5483763217926025, "learning_rate": 5.106796116504855e-06, "loss": 0.0159, "step": 7066 }, { "epoch": 4.895739521995151, "grad_norm": 0.506777822971344, "learning_rate": 5.106102635228849e-06, "loss": 0.0229, "step": 7067 }, { "epoch": 4.896432282646345, "grad_norm": 0.5792956948280334, "learning_rate": 5.105409153952844e-06, "loss": 0.0167, "step": 7068 }, { "epoch": 4.897125043297541, "grad_norm": 0.47799813747406006, "learning_rate": 5.1047156726768375e-06, "loss": 0.0175, "step": 7069 }, { "epoch": 4.897817803948736, "grad_norm": 0.4106331467628479, "learning_rate": 5.104022191400832e-06, "loss": 0.0137, "step": 7070 }, { "epoch": 4.898510564599931, "grad_norm": 0.5419927835464478, "learning_rate": 5.103328710124827e-06, "loss": 0.0181, "step": 7071 }, { "epoch": 4.899203325251126, "grad_norm": 0.8085773587226868, "learning_rate": 5.102635228848821e-06, "loss": 0.0216, "step": 7072 }, { "epoch": 4.899896085902321, "grad_norm": 0.43125635385513306, "learning_rate": 5.101941747572816e-06, "loss": 0.0208, "step": 7073 }, { "epoch": 4.900588846553516, "grad_norm": 0.49862387776374817, "learning_rate": 5.10124826629681e-06, "loss": 0.0141, "step": 7074 }, { "epoch": 4.9012816072047105, "grad_norm": 0.4481689929962158, "learning_rate": 5.1005547850208045e-06, "loss": 0.0125, "step": 7075 }, { "epoch": 4.901974367855906, "grad_norm": 0.6605470776557922, "learning_rate": 5.0998613037447995e-06, "loss": 0.0205, "step": 7076 }, { "epoch": 4.902667128507101, "grad_norm": 0.5066818594932556, "learning_rate": 5.0991678224687935e-06, "loss": 0.0146, "step": 7077 }, { "epoch": 4.9033598891582955, "grad_norm": 0.515537679195404, "learning_rate": 5.0984743411927885e-06, "loss": 0.0155, "step": 7078 }, { "epoch": 4.904052649809491, "grad_norm": 0.5556579828262329, "learning_rate": 5.0977808599167826e-06, "loss": 0.021, "step": 7079 }, { "epoch": 4.904745410460686, "grad_norm": 0.45318371057510376, "learning_rate": 5.0970873786407775e-06, "loss": 0.0129, "step": 7080 }, { "epoch": 4.90543817111188, "grad_norm": 0.5929465889930725, "learning_rate": 5.096393897364772e-06, "loss": 0.0168, "step": 7081 }, { "epoch": 4.906130931763076, "grad_norm": 0.4716765284538269, "learning_rate": 5.095700416088766e-06, "loss": 0.0116, "step": 7082 }, { "epoch": 4.906823692414271, "grad_norm": 0.49768924713134766, "learning_rate": 5.095006934812761e-06, "loss": 0.0179, "step": 7083 }, { "epoch": 4.907516453065466, "grad_norm": 0.37211477756500244, "learning_rate": 5.094313453536755e-06, "loss": 0.0109, "step": 7084 }, { "epoch": 4.908209213716661, "grad_norm": 0.41021373867988586, "learning_rate": 5.09361997226075e-06, "loss": 0.0143, "step": 7085 }, { "epoch": 4.908901974367856, "grad_norm": 0.4520041346549988, "learning_rate": 5.0929264909847446e-06, "loss": 0.0119, "step": 7086 }, { "epoch": 4.909594735019051, "grad_norm": 0.47611114382743835, "learning_rate": 5.092233009708738e-06, "loss": 0.0188, "step": 7087 }, { "epoch": 4.910287495670246, "grad_norm": 0.5464442372322083, "learning_rate": 5.091539528432733e-06, "loss": 0.0184, "step": 7088 }, { "epoch": 4.910980256321441, "grad_norm": 0.5857868790626526, "learning_rate": 5.090846047156727e-06, "loss": 0.0218, "step": 7089 }, { "epoch": 4.911673016972636, "grad_norm": 0.4725109338760376, "learning_rate": 5.090152565880722e-06, "loss": 0.0167, "step": 7090 }, { "epoch": 4.912365777623831, "grad_norm": 0.42381516098976135, "learning_rate": 5.089459084604717e-06, "loss": 0.0164, "step": 7091 }, { "epoch": 4.913058538275026, "grad_norm": 0.40553903579711914, "learning_rate": 5.08876560332871e-06, "loss": 0.012, "step": 7092 }, { "epoch": 4.913751298926221, "grad_norm": 0.6457179188728333, "learning_rate": 5.088072122052705e-06, "loss": 0.0164, "step": 7093 }, { "epoch": 4.914444059577416, "grad_norm": 0.39204317331314087, "learning_rate": 5.087378640776699e-06, "loss": 0.0122, "step": 7094 }, { "epoch": 4.915136820228611, "grad_norm": 0.5320116877555847, "learning_rate": 5.086685159500694e-06, "loss": 0.0161, "step": 7095 }, { "epoch": 4.915829580879806, "grad_norm": 0.42274361848831177, "learning_rate": 5.085991678224689e-06, "loss": 0.0129, "step": 7096 }, { "epoch": 4.916522341531001, "grad_norm": 0.42949315905570984, "learning_rate": 5.085298196948683e-06, "loss": 0.0153, "step": 7097 }, { "epoch": 4.917215102182196, "grad_norm": 0.49816930294036865, "learning_rate": 5.084604715672678e-06, "loss": 0.0195, "step": 7098 }, { "epoch": 4.917907862833391, "grad_norm": 0.449648916721344, "learning_rate": 5.083911234396671e-06, "loss": 0.014, "step": 7099 }, { "epoch": 4.918600623484586, "grad_norm": 0.5509673357009888, "learning_rate": 5.083217753120666e-06, "loss": 0.0162, "step": 7100 }, { "epoch": 4.919293384135781, "grad_norm": 0.487145334482193, "learning_rate": 5.082524271844661e-06, "loss": 0.0166, "step": 7101 }, { "epoch": 4.919986144786976, "grad_norm": 0.4487362205982208, "learning_rate": 5.081830790568655e-06, "loss": 0.0122, "step": 7102 }, { "epoch": 4.920678905438171, "grad_norm": 0.4550587832927704, "learning_rate": 5.08113730929265e-06, "loss": 0.021, "step": 7103 }, { "epoch": 4.9213716660893665, "grad_norm": 0.6630377769470215, "learning_rate": 5.080443828016643e-06, "loss": 0.0275, "step": 7104 }, { "epoch": 4.922064426740561, "grad_norm": 0.5580195188522339, "learning_rate": 5.079750346740638e-06, "loss": 0.0157, "step": 7105 }, { "epoch": 4.922757187391756, "grad_norm": 0.4646613895893097, "learning_rate": 5.079056865464633e-06, "loss": 0.017, "step": 7106 }, { "epoch": 4.923449948042951, "grad_norm": 0.5144959688186646, "learning_rate": 5.078363384188627e-06, "loss": 0.0153, "step": 7107 }, { "epoch": 4.924142708694146, "grad_norm": 0.5957618951797485, "learning_rate": 5.077669902912622e-06, "loss": 0.0207, "step": 7108 }, { "epoch": 4.924835469345341, "grad_norm": 0.4713325798511505, "learning_rate": 5.076976421636616e-06, "loss": 0.0189, "step": 7109 }, { "epoch": 4.925528229996536, "grad_norm": 0.486539751291275, "learning_rate": 5.07628294036061e-06, "loss": 0.0168, "step": 7110 }, { "epoch": 4.926220990647732, "grad_norm": 0.5931656360626221, "learning_rate": 5.075589459084605e-06, "loss": 0.0189, "step": 7111 }, { "epoch": 4.926913751298926, "grad_norm": 0.4117998778820038, "learning_rate": 5.074895977808599e-06, "loss": 0.0146, "step": 7112 }, { "epoch": 4.927606511950121, "grad_norm": 0.42907875776290894, "learning_rate": 5.074202496532594e-06, "loss": 0.0121, "step": 7113 }, { "epoch": 4.928299272601317, "grad_norm": 0.4469527006149292, "learning_rate": 5.073509015256588e-06, "loss": 0.0122, "step": 7114 }, { "epoch": 4.928992033252511, "grad_norm": 0.4878230094909668, "learning_rate": 5.072815533980583e-06, "loss": 0.0184, "step": 7115 }, { "epoch": 4.929684793903706, "grad_norm": 0.6271370053291321, "learning_rate": 5.072122052704578e-06, "loss": 0.0149, "step": 7116 }, { "epoch": 4.930377554554902, "grad_norm": 0.4054403007030487, "learning_rate": 5.071428571428571e-06, "loss": 0.0113, "step": 7117 }, { "epoch": 4.931070315206096, "grad_norm": 0.52023845911026, "learning_rate": 5.070735090152566e-06, "loss": 0.0193, "step": 7118 }, { "epoch": 4.931763075857291, "grad_norm": 0.38854172825813293, "learning_rate": 5.07004160887656e-06, "loss": 0.0128, "step": 7119 }, { "epoch": 4.9324558365084865, "grad_norm": 0.42394208908081055, "learning_rate": 5.069348127600555e-06, "loss": 0.0149, "step": 7120 }, { "epoch": 4.933148597159681, "grad_norm": 0.37314775586128235, "learning_rate": 5.06865464632455e-06, "loss": 0.0103, "step": 7121 }, { "epoch": 4.933841357810876, "grad_norm": 0.3798964321613312, "learning_rate": 5.0679611650485435e-06, "loss": 0.017, "step": 7122 }, { "epoch": 4.9345341184620715, "grad_norm": 0.6011042594909668, "learning_rate": 5.0672676837725385e-06, "loss": 0.0244, "step": 7123 }, { "epoch": 4.935226879113267, "grad_norm": 0.5264334678649902, "learning_rate": 5.0665742024965325e-06, "loss": 0.0184, "step": 7124 }, { "epoch": 4.935919639764461, "grad_norm": 0.5989033579826355, "learning_rate": 5.0658807212205275e-06, "loss": 0.0165, "step": 7125 }, { "epoch": 4.936612400415656, "grad_norm": 0.6535217761993408, "learning_rate": 5.065187239944522e-06, "loss": 0.0165, "step": 7126 }, { "epoch": 4.937305161066852, "grad_norm": 0.44317764043807983, "learning_rate": 5.0644937586685165e-06, "loss": 0.0117, "step": 7127 }, { "epoch": 4.937997921718046, "grad_norm": 0.4648471772670746, "learning_rate": 5.0638002773925114e-06, "loss": 0.0141, "step": 7128 }, { "epoch": 4.938690682369241, "grad_norm": 0.7307034134864807, "learning_rate": 5.063106796116505e-06, "loss": 0.0212, "step": 7129 }, { "epoch": 4.939383443020437, "grad_norm": 0.40065693855285645, "learning_rate": 5.0624133148405e-06, "loss": 0.0143, "step": 7130 }, { "epoch": 4.940076203671632, "grad_norm": 0.4784284830093384, "learning_rate": 5.0617198335644945e-06, "loss": 0.0111, "step": 7131 }, { "epoch": 4.940768964322826, "grad_norm": 0.4007522165775299, "learning_rate": 5.061026352288489e-06, "loss": 0.0106, "step": 7132 }, { "epoch": 4.941461724974022, "grad_norm": 0.4763968586921692, "learning_rate": 5.0603328710124836e-06, "loss": 0.0157, "step": 7133 }, { "epoch": 4.942154485625217, "grad_norm": 0.7442196607589722, "learning_rate": 5.059639389736477e-06, "loss": 0.0147, "step": 7134 }, { "epoch": 4.942847246276411, "grad_norm": 0.48007553815841675, "learning_rate": 5.058945908460472e-06, "loss": 0.0145, "step": 7135 }, { "epoch": 4.9435400069276065, "grad_norm": 0.5864841938018799, "learning_rate": 5.058252427184467e-06, "loss": 0.0156, "step": 7136 }, { "epoch": 4.944232767578802, "grad_norm": 0.5981998443603516, "learning_rate": 5.057558945908461e-06, "loss": 0.0239, "step": 7137 }, { "epoch": 4.944925528229996, "grad_norm": 0.5885744094848633, "learning_rate": 5.056865464632456e-06, "loss": 0.0199, "step": 7138 }, { "epoch": 4.9456182888811915, "grad_norm": 0.41168013215065, "learning_rate": 5.056171983356449e-06, "loss": 0.013, "step": 7139 }, { "epoch": 4.946311049532387, "grad_norm": 0.5568606853485107, "learning_rate": 5.055478502080444e-06, "loss": 0.0224, "step": 7140 }, { "epoch": 4.947003810183581, "grad_norm": 0.6031033396720886, "learning_rate": 5.054785020804439e-06, "loss": 0.0225, "step": 7141 }, { "epoch": 4.947696570834776, "grad_norm": 0.49166953563690186, "learning_rate": 5.054091539528433e-06, "loss": 0.0176, "step": 7142 }, { "epoch": 4.948389331485972, "grad_norm": 0.4761636555194855, "learning_rate": 5.053398058252428e-06, "loss": 0.0206, "step": 7143 }, { "epoch": 4.949082092137167, "grad_norm": 0.4680408537387848, "learning_rate": 5.052704576976422e-06, "loss": 0.0131, "step": 7144 }, { "epoch": 4.949774852788361, "grad_norm": 0.5143632888793945, "learning_rate": 5.052011095700417e-06, "loss": 0.0181, "step": 7145 }, { "epoch": 4.950467613439557, "grad_norm": 0.44326531887054443, "learning_rate": 5.051317614424412e-06, "loss": 0.0128, "step": 7146 }, { "epoch": 4.951160374090752, "grad_norm": 0.6614006757736206, "learning_rate": 5.050624133148405e-06, "loss": 0.0143, "step": 7147 }, { "epoch": 4.951853134741946, "grad_norm": 0.6010259389877319, "learning_rate": 5.0499306518724e-06, "loss": 0.0217, "step": 7148 }, { "epoch": 4.952545895393142, "grad_norm": 0.5312649607658386, "learning_rate": 5.049237170596394e-06, "loss": 0.0177, "step": 7149 }, { "epoch": 4.953238656044337, "grad_norm": 0.4602656960487366, "learning_rate": 5.048543689320389e-06, "loss": 0.015, "step": 7150 }, { "epoch": 4.953931416695532, "grad_norm": 0.4423941373825073, "learning_rate": 5.047850208044384e-06, "loss": 0.0111, "step": 7151 }, { "epoch": 4.9546241773467266, "grad_norm": 0.608744740486145, "learning_rate": 5.047156726768377e-06, "loss": 0.0155, "step": 7152 }, { "epoch": 4.955316937997922, "grad_norm": 0.5473113059997559, "learning_rate": 5.046463245492372e-06, "loss": 0.0165, "step": 7153 }, { "epoch": 4.956009698649117, "grad_norm": 0.6941458582878113, "learning_rate": 5.045769764216366e-06, "loss": 0.0142, "step": 7154 }, { "epoch": 4.9567024593003115, "grad_norm": 0.500970184803009, "learning_rate": 5.045076282940361e-06, "loss": 0.0227, "step": 7155 }, { "epoch": 4.957395219951507, "grad_norm": 0.4593062698841095, "learning_rate": 5.044382801664356e-06, "loss": 0.0153, "step": 7156 }, { "epoch": 4.958087980602702, "grad_norm": 0.6693342924118042, "learning_rate": 5.04368932038835e-06, "loss": 0.0138, "step": 7157 }, { "epoch": 4.9587807412538965, "grad_norm": 0.5365694761276245, "learning_rate": 5.042995839112344e-06, "loss": 0.0173, "step": 7158 }, { "epoch": 4.959473501905092, "grad_norm": 0.37168315052986145, "learning_rate": 5.042302357836338e-06, "loss": 0.0132, "step": 7159 }, { "epoch": 4.960166262556287, "grad_norm": 0.489751398563385, "learning_rate": 5.041608876560333e-06, "loss": 0.0153, "step": 7160 }, { "epoch": 4.960859023207481, "grad_norm": 0.8007532358169556, "learning_rate": 5.040915395284328e-06, "loss": 0.0175, "step": 7161 }, { "epoch": 4.961551783858677, "grad_norm": 0.41315120458602905, "learning_rate": 5.040221914008322e-06, "loss": 0.0164, "step": 7162 }, { "epoch": 4.962244544509872, "grad_norm": 0.4592771828174591, "learning_rate": 5.039528432732317e-06, "loss": 0.0137, "step": 7163 }, { "epoch": 4.962937305161067, "grad_norm": 0.5716656446456909, "learning_rate": 5.03883495145631e-06, "loss": 0.0232, "step": 7164 }, { "epoch": 4.963630065812262, "grad_norm": 0.5507204532623291, "learning_rate": 5.038141470180305e-06, "loss": 0.0113, "step": 7165 }, { "epoch": 4.964322826463457, "grad_norm": 0.46243104338645935, "learning_rate": 5.0374479889043e-06, "loss": 0.0174, "step": 7166 }, { "epoch": 4.965015587114652, "grad_norm": 0.4487704634666443, "learning_rate": 5.036754507628294e-06, "loss": 0.0134, "step": 7167 }, { "epoch": 4.965708347765847, "grad_norm": 0.5466247797012329, "learning_rate": 5.036061026352289e-06, "loss": 0.0175, "step": 7168 }, { "epoch": 4.966401108417042, "grad_norm": 0.48058101534843445, "learning_rate": 5.0353675450762825e-06, "loss": 0.0165, "step": 7169 }, { "epoch": 4.967093869068237, "grad_norm": 0.5494735240936279, "learning_rate": 5.0346740638002775e-06, "loss": 0.0234, "step": 7170 }, { "epoch": 4.967786629719432, "grad_norm": 0.5059705972671509, "learning_rate": 5.033980582524272e-06, "loss": 0.0162, "step": 7171 }, { "epoch": 4.968479390370627, "grad_norm": 0.5217702388763428, "learning_rate": 5.0332871012482665e-06, "loss": 0.0205, "step": 7172 }, { "epoch": 4.969172151021822, "grad_norm": 0.6256882548332214, "learning_rate": 5.032593619972261e-06, "loss": 0.0234, "step": 7173 }, { "epoch": 4.969864911673017, "grad_norm": 0.5773966908454895, "learning_rate": 5.0319001386962555e-06, "loss": 0.0195, "step": 7174 }, { "epoch": 4.970557672324212, "grad_norm": 0.5373857021331787, "learning_rate": 5.0312066574202504e-06, "loss": 0.012, "step": 7175 }, { "epoch": 4.971250432975407, "grad_norm": 0.48725610971450806, "learning_rate": 5.030513176144245e-06, "loss": 0.0126, "step": 7176 }, { "epoch": 4.971943193626602, "grad_norm": 0.6250792741775513, "learning_rate": 5.029819694868239e-06, "loss": 0.0192, "step": 7177 }, { "epoch": 4.972635954277797, "grad_norm": 0.4702279269695282, "learning_rate": 5.0291262135922335e-06, "loss": 0.0169, "step": 7178 }, { "epoch": 4.973328714928992, "grad_norm": 0.3435475826263428, "learning_rate": 5.028432732316228e-06, "loss": 0.0124, "step": 7179 }, { "epoch": 4.974021475580187, "grad_norm": 0.6400315761566162, "learning_rate": 5.0277392510402226e-06, "loss": 0.0115, "step": 7180 }, { "epoch": 4.974714236231382, "grad_norm": 0.5794646143913269, "learning_rate": 5.0270457697642175e-06, "loss": 0.0223, "step": 7181 }, { "epoch": 4.975406996882577, "grad_norm": 0.38571569323539734, "learning_rate": 5.026352288488211e-06, "loss": 0.0114, "step": 7182 }, { "epoch": 4.976099757533772, "grad_norm": 0.5065745711326599, "learning_rate": 5.025658807212206e-06, "loss": 0.0219, "step": 7183 }, { "epoch": 4.976792518184967, "grad_norm": 0.4838719069957733, "learning_rate": 5.0249653259362e-06, "loss": 0.0161, "step": 7184 }, { "epoch": 4.977485278836162, "grad_norm": 0.5162280201911926, "learning_rate": 5.024271844660195e-06, "loss": 0.0189, "step": 7185 }, { "epoch": 4.978178039487357, "grad_norm": 0.3886980712413788, "learning_rate": 5.02357836338419e-06, "loss": 0.0097, "step": 7186 }, { "epoch": 4.978870800138552, "grad_norm": 0.5071686506271362, "learning_rate": 5.022884882108183e-06, "loss": 0.0181, "step": 7187 }, { "epoch": 4.979563560789747, "grad_norm": 0.5926240086555481, "learning_rate": 5.022191400832178e-06, "loss": 0.018, "step": 7188 }, { "epoch": 4.980256321440942, "grad_norm": 0.3423044979572296, "learning_rate": 5.021497919556172e-06, "loss": 0.0099, "step": 7189 }, { "epoch": 4.980949082092137, "grad_norm": 0.5712215900421143, "learning_rate": 5.020804438280167e-06, "loss": 0.0191, "step": 7190 }, { "epoch": 4.981641842743333, "grad_norm": 0.42569872736930847, "learning_rate": 5.020110957004162e-06, "loss": 0.0149, "step": 7191 }, { "epoch": 4.982334603394527, "grad_norm": 0.5604143738746643, "learning_rate": 5.019417475728156e-06, "loss": 0.0145, "step": 7192 }, { "epoch": 4.983027364045722, "grad_norm": 0.5349857807159424, "learning_rate": 5.018723994452151e-06, "loss": 0.0178, "step": 7193 }, { "epoch": 4.983720124696918, "grad_norm": 0.5973315238952637, "learning_rate": 5.018030513176144e-06, "loss": 0.0188, "step": 7194 }, { "epoch": 4.984412885348112, "grad_norm": 0.40893352031707764, "learning_rate": 5.017337031900139e-06, "loss": 0.0139, "step": 7195 }, { "epoch": 4.985105645999307, "grad_norm": 0.39439642429351807, "learning_rate": 5.016643550624134e-06, "loss": 0.0125, "step": 7196 }, { "epoch": 4.985798406650503, "grad_norm": 0.4668135643005371, "learning_rate": 5.015950069348128e-06, "loss": 0.0107, "step": 7197 }, { "epoch": 4.986491167301697, "grad_norm": 0.5245904922485352, "learning_rate": 5.015256588072123e-06, "loss": 0.0226, "step": 7198 }, { "epoch": 4.987183927952892, "grad_norm": 0.6042958498001099, "learning_rate": 5.014563106796116e-06, "loss": 0.0191, "step": 7199 }, { "epoch": 4.9878766886040875, "grad_norm": 0.553793728351593, "learning_rate": 5.013869625520111e-06, "loss": 0.0203, "step": 7200 }, { "epoch": 4.988569449255282, "grad_norm": 0.5331743955612183, "learning_rate": 5.013176144244106e-06, "loss": 0.0149, "step": 7201 }, { "epoch": 4.989262209906477, "grad_norm": 0.6598172783851624, "learning_rate": 5.0124826629681e-06, "loss": 0.0151, "step": 7202 }, { "epoch": 4.9899549705576725, "grad_norm": 0.5911411046981812, "learning_rate": 5.011789181692095e-06, "loss": 0.0174, "step": 7203 }, { "epoch": 4.990647731208867, "grad_norm": 0.4001295268535614, "learning_rate": 5.011095700416089e-06, "loss": 0.0133, "step": 7204 }, { "epoch": 4.991340491860062, "grad_norm": 0.4572732448577881, "learning_rate": 5.010402219140084e-06, "loss": 0.0116, "step": 7205 }, { "epoch": 4.992033252511257, "grad_norm": 0.635037899017334, "learning_rate": 5.009708737864078e-06, "loss": 0.0109, "step": 7206 }, { "epoch": 4.992726013162453, "grad_norm": 0.6858110427856445, "learning_rate": 5.009015256588072e-06, "loss": 0.0138, "step": 7207 }, { "epoch": 4.993418773813647, "grad_norm": 0.48281699419021606, "learning_rate": 5.008321775312067e-06, "loss": 0.0145, "step": 7208 }, { "epoch": 4.994111534464842, "grad_norm": 0.5513356924057007, "learning_rate": 5.007628294036061e-06, "loss": 0.0131, "step": 7209 }, { "epoch": 4.994804295116038, "grad_norm": 0.6060163378715515, "learning_rate": 5.006934812760056e-06, "loss": 0.0181, "step": 7210 }, { "epoch": 4.995497055767233, "grad_norm": 0.4762422442436218, "learning_rate": 5.006241331484051e-06, "loss": 0.0167, "step": 7211 }, { "epoch": 4.996189816418427, "grad_norm": 0.44957783818244934, "learning_rate": 5.005547850208044e-06, "loss": 0.0188, "step": 7212 }, { "epoch": 4.996882577069623, "grad_norm": 0.48919323086738586, "learning_rate": 5.004854368932039e-06, "loss": 0.0158, "step": 7213 }, { "epoch": 4.997575337720818, "grad_norm": 0.4546241760253906, "learning_rate": 5.004160887656033e-06, "loss": 0.0161, "step": 7214 }, { "epoch": 4.998268098372012, "grad_norm": 0.6716766357421875, "learning_rate": 5.003467406380028e-06, "loss": 0.0199, "step": 7215 }, { "epoch": 4.9989608590232075, "grad_norm": 0.5103355050086975, "learning_rate": 5.002773925104023e-06, "loss": 0.0185, "step": 7216 }, { "epoch": 4.999653619674403, "grad_norm": 0.4889625906944275, "learning_rate": 5.0020804438280165e-06, "loss": 0.0197, "step": 7217 }, { "epoch": 4.999653619674403, "eval_loss": 0.2697892189025879, "eval_runtime": 7671.3155, "eval_samples_per_second": 1.043, "eval_steps_per_second": 0.033, "eval_wer": 12.525478411777724, "step": 7217 }, { "epoch": 5.000346380325597, "grad_norm": 0.45957180857658386, "learning_rate": 5.001386962552011e-06, "loss": 0.0137, "step": 7218 }, { "epoch": 5.0010391409767925, "grad_norm": 0.3669784963130951, "learning_rate": 5.0006934812760055e-06, "loss": 0.0076, "step": 7219 }, { "epoch": 5.001731901627988, "grad_norm": 0.36706864833831787, "learning_rate": 5e-06, "loss": 0.0104, "step": 7220 }, { "epoch": 5.002424662279182, "grad_norm": 0.43076092004776, "learning_rate": 4.9993065187239945e-06, "loss": 0.01, "step": 7221 }, { "epoch": 5.003117422930377, "grad_norm": 0.32632872462272644, "learning_rate": 4.9986130374479894e-06, "loss": 0.0072, "step": 7222 }, { "epoch": 5.003810183581573, "grad_norm": 0.3419957458972931, "learning_rate": 4.997919556171984e-06, "loss": 0.0128, "step": 7223 }, { "epoch": 5.004502944232768, "grad_norm": 0.29304713010787964, "learning_rate": 4.9972260748959784e-06, "loss": 0.0087, "step": 7224 }, { "epoch": 5.005195704883962, "grad_norm": 0.34947752952575684, "learning_rate": 4.9965325936199725e-06, "loss": 0.0088, "step": 7225 }, { "epoch": 5.005888465535158, "grad_norm": 0.29535210132598877, "learning_rate": 4.995839112343967e-06, "loss": 0.0076, "step": 7226 }, { "epoch": 5.006581226186353, "grad_norm": 0.4918957054615021, "learning_rate": 4.9951456310679616e-06, "loss": 0.0122, "step": 7227 }, { "epoch": 5.007273986837547, "grad_norm": 0.277309775352478, "learning_rate": 4.9944521497919565e-06, "loss": 0.0065, "step": 7228 }, { "epoch": 5.007966747488743, "grad_norm": 0.4154784083366394, "learning_rate": 4.9937586685159506e-06, "loss": 0.0099, "step": 7229 }, { "epoch": 5.008659508139938, "grad_norm": 0.25265270471572876, "learning_rate": 4.993065187239945e-06, "loss": 0.0062, "step": 7230 }, { "epoch": 5.009352268791132, "grad_norm": 0.2866215407848358, "learning_rate": 4.99237170596394e-06, "loss": 0.0094, "step": 7231 }, { "epoch": 5.0100450294423275, "grad_norm": 0.30181148648262024, "learning_rate": 4.991678224687934e-06, "loss": 0.0072, "step": 7232 }, { "epoch": 5.010737790093523, "grad_norm": 0.3594330847263336, "learning_rate": 4.990984743411929e-06, "loss": 0.0087, "step": 7233 }, { "epoch": 5.011430550744718, "grad_norm": 0.26650872826576233, "learning_rate": 4.990291262135923e-06, "loss": 0.0053, "step": 7234 }, { "epoch": 5.0121233113959125, "grad_norm": 0.5954672694206238, "learning_rate": 4.989597780859917e-06, "loss": 0.0079, "step": 7235 }, { "epoch": 5.012816072047108, "grad_norm": 0.27336519956588745, "learning_rate": 4.988904299583912e-06, "loss": 0.006, "step": 7236 }, { "epoch": 5.013508832698303, "grad_norm": 0.3480377495288849, "learning_rate": 4.988210818307906e-06, "loss": 0.0085, "step": 7237 }, { "epoch": 5.014201593349497, "grad_norm": 0.38182446360588074, "learning_rate": 4.987517337031901e-06, "loss": 0.0091, "step": 7238 }, { "epoch": 5.014894354000693, "grad_norm": 0.36752888560295105, "learning_rate": 4.986823855755895e-06, "loss": 0.007, "step": 7239 }, { "epoch": 5.015587114651888, "grad_norm": 0.30378803610801697, "learning_rate": 4.98613037447989e-06, "loss": 0.0075, "step": 7240 }, { "epoch": 5.016279875303082, "grad_norm": 0.31696900725364685, "learning_rate": 4.985436893203884e-06, "loss": 0.0081, "step": 7241 }, { "epoch": 5.016972635954278, "grad_norm": 0.23790748417377472, "learning_rate": 4.984743411927878e-06, "loss": 0.0062, "step": 7242 }, { "epoch": 5.017665396605473, "grad_norm": 0.3500445783138275, "learning_rate": 4.984049930651873e-06, "loss": 0.0069, "step": 7243 }, { "epoch": 5.018358157256668, "grad_norm": 0.31131619215011597, "learning_rate": 4.983356449375868e-06, "loss": 0.0064, "step": 7244 }, { "epoch": 5.019050917907863, "grad_norm": 0.31733691692352295, "learning_rate": 4.982662968099862e-06, "loss": 0.0074, "step": 7245 }, { "epoch": 5.019743678559058, "grad_norm": 0.3574613928794861, "learning_rate": 4.981969486823856e-06, "loss": 0.0102, "step": 7246 }, { "epoch": 5.020436439210253, "grad_norm": 0.2864325940608978, "learning_rate": 4.98127600554785e-06, "loss": 0.0069, "step": 7247 }, { "epoch": 5.021129199861448, "grad_norm": 0.24767273664474487, "learning_rate": 4.980582524271845e-06, "loss": 0.0059, "step": 7248 }, { "epoch": 5.021821960512643, "grad_norm": 0.5374817848205566, "learning_rate": 4.97988904299584e-06, "loss": 0.0096, "step": 7249 }, { "epoch": 5.022514721163838, "grad_norm": 0.5407879948616028, "learning_rate": 4.979195561719834e-06, "loss": 0.0123, "step": 7250 }, { "epoch": 5.0232074818150325, "grad_norm": 0.30810102820396423, "learning_rate": 4.978502080443828e-06, "loss": 0.0075, "step": 7251 }, { "epoch": 5.023900242466228, "grad_norm": 0.26572948694229126, "learning_rate": 4.977808599167823e-06, "loss": 0.0069, "step": 7252 }, { "epoch": 5.024593003117423, "grad_norm": 0.3364541232585907, "learning_rate": 4.977115117891818e-06, "loss": 0.0067, "step": 7253 }, { "epoch": 5.025285763768618, "grad_norm": 0.3556959629058838, "learning_rate": 4.976421636615812e-06, "loss": 0.0058, "step": 7254 }, { "epoch": 5.025978524419813, "grad_norm": 0.25573965907096863, "learning_rate": 4.975728155339806e-06, "loss": 0.0057, "step": 7255 }, { "epoch": 5.026671285071008, "grad_norm": 0.315265417098999, "learning_rate": 4.9750346740638e-06, "loss": 0.008, "step": 7256 }, { "epoch": 5.027364045722203, "grad_norm": 0.5426953434944153, "learning_rate": 4.974341192787795e-06, "loss": 0.009, "step": 7257 }, { "epoch": 5.028056806373398, "grad_norm": 0.312299907207489, "learning_rate": 4.97364771151179e-06, "loss": 0.0067, "step": 7258 }, { "epoch": 5.028749567024593, "grad_norm": 0.24006272852420807, "learning_rate": 4.972954230235784e-06, "loss": 0.0062, "step": 7259 }, { "epoch": 5.029442327675788, "grad_norm": 0.3840598165988922, "learning_rate": 4.972260748959778e-06, "loss": 0.0084, "step": 7260 }, { "epoch": 5.030135088326983, "grad_norm": 0.23196850717067719, "learning_rate": 4.971567267683773e-06, "loss": 0.0065, "step": 7261 }, { "epoch": 5.030827848978178, "grad_norm": 0.27836501598358154, "learning_rate": 4.970873786407767e-06, "loss": 0.0065, "step": 7262 }, { "epoch": 5.031520609629373, "grad_norm": 0.3527560532093048, "learning_rate": 4.970180305131762e-06, "loss": 0.0079, "step": 7263 }, { "epoch": 5.0322133702805685, "grad_norm": 0.41922900080680847, "learning_rate": 4.969486823855756e-06, "loss": 0.0105, "step": 7264 }, { "epoch": 5.032906130931763, "grad_norm": 0.27435484528541565, "learning_rate": 4.96879334257975e-06, "loss": 0.0064, "step": 7265 }, { "epoch": 5.033598891582958, "grad_norm": 0.32516488432884216, "learning_rate": 4.968099861303745e-06, "loss": 0.0071, "step": 7266 }, { "epoch": 5.034291652234153, "grad_norm": 0.37962162494659424, "learning_rate": 4.967406380027739e-06, "loss": 0.0117, "step": 7267 }, { "epoch": 5.034984412885348, "grad_norm": 0.3413487672805786, "learning_rate": 4.966712898751734e-06, "loss": 0.0073, "step": 7268 }, { "epoch": 5.035677173536543, "grad_norm": 0.26364386081695557, "learning_rate": 4.9660194174757284e-06, "loss": 0.0074, "step": 7269 }, { "epoch": 5.036369934187738, "grad_norm": 0.3675404191017151, "learning_rate": 4.965325936199723e-06, "loss": 0.0088, "step": 7270 }, { "epoch": 5.037062694838933, "grad_norm": 0.36805999279022217, "learning_rate": 4.9646324549237174e-06, "loss": 0.0078, "step": 7271 }, { "epoch": 5.037755455490128, "grad_norm": 0.33098238706588745, "learning_rate": 4.9639389736477115e-06, "loss": 0.0093, "step": 7272 }, { "epoch": 5.038448216141323, "grad_norm": 0.2958826720714569, "learning_rate": 4.9632454923717065e-06, "loss": 0.0083, "step": 7273 }, { "epoch": 5.039140976792519, "grad_norm": 0.3946586847305298, "learning_rate": 4.9625520110957006e-06, "loss": 0.0106, "step": 7274 }, { "epoch": 5.039833737443713, "grad_norm": 0.3046253025531769, "learning_rate": 4.9618585298196955e-06, "loss": 0.0079, "step": 7275 }, { "epoch": 5.040526498094908, "grad_norm": 0.4130062162876129, "learning_rate": 4.9611650485436896e-06, "loss": 0.011, "step": 7276 }, { "epoch": 5.0412192587461035, "grad_norm": 0.24970969557762146, "learning_rate": 4.960471567267684e-06, "loss": 0.0057, "step": 7277 }, { "epoch": 5.041912019397298, "grad_norm": 0.29134806990623474, "learning_rate": 4.959778085991679e-06, "loss": 0.0079, "step": 7278 }, { "epoch": 5.042604780048493, "grad_norm": 0.2696983814239502, "learning_rate": 4.9590846047156735e-06, "loss": 0.0074, "step": 7279 }, { "epoch": 5.0432975406996885, "grad_norm": 0.22379395365715027, "learning_rate": 4.958391123439668e-06, "loss": 0.0059, "step": 7280 }, { "epoch": 5.043990301350883, "grad_norm": 0.5553340315818787, "learning_rate": 4.957697642163662e-06, "loss": 0.0077, "step": 7281 }, { "epoch": 5.044683062002078, "grad_norm": 0.46020668745040894, "learning_rate": 4.957004160887657e-06, "loss": 0.0088, "step": 7282 }, { "epoch": 5.0453758226532734, "grad_norm": 0.2178874909877777, "learning_rate": 4.956310679611651e-06, "loss": 0.0041, "step": 7283 }, { "epoch": 5.046068583304469, "grad_norm": 0.41293787956237793, "learning_rate": 4.955617198335646e-06, "loss": 0.0074, "step": 7284 }, { "epoch": 5.046761343955663, "grad_norm": 0.33358272910118103, "learning_rate": 4.95492371705964e-06, "loss": 0.0075, "step": 7285 }, { "epoch": 5.047454104606858, "grad_norm": 0.33142977952957153, "learning_rate": 4.954230235783634e-06, "loss": 0.0056, "step": 7286 }, { "epoch": 5.048146865258054, "grad_norm": 0.3636792004108429, "learning_rate": 4.953536754507629e-06, "loss": 0.0071, "step": 7287 }, { "epoch": 5.048839625909248, "grad_norm": 0.24365943670272827, "learning_rate": 4.952843273231624e-06, "loss": 0.0046, "step": 7288 }, { "epoch": 5.049532386560443, "grad_norm": 0.2750144600868225, "learning_rate": 4.952149791955618e-06, "loss": 0.0071, "step": 7289 }, { "epoch": 5.050225147211639, "grad_norm": 0.2605492174625397, "learning_rate": 4.951456310679612e-06, "loss": 0.005, "step": 7290 }, { "epoch": 5.050917907862833, "grad_norm": 0.2993355989456177, "learning_rate": 4.950762829403607e-06, "loss": 0.0067, "step": 7291 }, { "epoch": 5.051610668514028, "grad_norm": 0.4299032986164093, "learning_rate": 4.950069348127601e-06, "loss": 0.007, "step": 7292 }, { "epoch": 5.052303429165224, "grad_norm": 0.3229838013648987, "learning_rate": 4.949375866851596e-06, "loss": 0.0062, "step": 7293 }, { "epoch": 5.052996189816419, "grad_norm": 0.3593423366546631, "learning_rate": 4.94868238557559e-06, "loss": 0.006, "step": 7294 }, { "epoch": 5.053688950467613, "grad_norm": 0.40031617879867554, "learning_rate": 4.947988904299584e-06, "loss": 0.0054, "step": 7295 }, { "epoch": 5.0543817111188085, "grad_norm": 0.3411800265312195, "learning_rate": 4.947295423023579e-06, "loss": 0.0082, "step": 7296 }, { "epoch": 5.055074471770004, "grad_norm": 0.37931033968925476, "learning_rate": 4.946601941747573e-06, "loss": 0.0069, "step": 7297 }, { "epoch": 5.055767232421198, "grad_norm": 0.290792316198349, "learning_rate": 4.945908460471568e-06, "loss": 0.0085, "step": 7298 }, { "epoch": 5.0564599930723935, "grad_norm": 0.23591336607933044, "learning_rate": 4.945214979195562e-06, "loss": 0.0064, "step": 7299 }, { "epoch": 5.057152753723589, "grad_norm": 0.25219523906707764, "learning_rate": 4.944521497919557e-06, "loss": 0.0056, "step": 7300 }, { "epoch": 5.057845514374783, "grad_norm": 0.18537768721580505, "learning_rate": 4.943828016643551e-06, "loss": 0.0054, "step": 7301 }, { "epoch": 5.058538275025978, "grad_norm": 0.3577527105808258, "learning_rate": 4.943134535367545e-06, "loss": 0.0099, "step": 7302 }, { "epoch": 5.059231035677174, "grad_norm": 0.3047337234020233, "learning_rate": 4.94244105409154e-06, "loss": 0.0072, "step": 7303 }, { "epoch": 5.059923796328369, "grad_norm": 0.13182613253593445, "learning_rate": 4.941747572815534e-06, "loss": 0.0033, "step": 7304 }, { "epoch": 5.060616556979563, "grad_norm": 0.30081212520599365, "learning_rate": 4.941054091539529e-06, "loss": 0.0071, "step": 7305 }, { "epoch": 5.061309317630759, "grad_norm": 0.23153644800186157, "learning_rate": 4.940360610263523e-06, "loss": 0.0059, "step": 7306 }, { "epoch": 5.062002078281954, "grad_norm": 0.2580624222755432, "learning_rate": 4.939667128987517e-06, "loss": 0.0071, "step": 7307 }, { "epoch": 5.062694838933148, "grad_norm": 0.22963561117649078, "learning_rate": 4.938973647711512e-06, "loss": 0.0051, "step": 7308 }, { "epoch": 5.063387599584344, "grad_norm": 0.23751002550125122, "learning_rate": 4.938280166435507e-06, "loss": 0.0058, "step": 7309 }, { "epoch": 5.064080360235539, "grad_norm": 0.2023371160030365, "learning_rate": 4.937586685159501e-06, "loss": 0.0055, "step": 7310 }, { "epoch": 5.064773120886733, "grad_norm": 0.2542552053928375, "learning_rate": 4.936893203883495e-06, "loss": 0.0066, "step": 7311 }, { "epoch": 5.0654658815379285, "grad_norm": 0.4164277911186218, "learning_rate": 4.936199722607489e-06, "loss": 0.0086, "step": 7312 }, { "epoch": 5.066158642189124, "grad_norm": 0.5117143392562866, "learning_rate": 4.935506241331484e-06, "loss": 0.0076, "step": 7313 }, { "epoch": 5.066851402840319, "grad_norm": 0.23055152595043182, "learning_rate": 4.934812760055479e-06, "loss": 0.0056, "step": 7314 }, { "epoch": 5.0675441634915135, "grad_norm": 0.351272851228714, "learning_rate": 4.934119278779473e-06, "loss": 0.0081, "step": 7315 }, { "epoch": 5.068236924142709, "grad_norm": 0.38055822253227234, "learning_rate": 4.9334257975034674e-06, "loss": 0.0077, "step": 7316 }, { "epoch": 5.068929684793904, "grad_norm": 0.38499513268470764, "learning_rate": 4.932732316227462e-06, "loss": 0.0076, "step": 7317 }, { "epoch": 5.069622445445098, "grad_norm": 0.23655486106872559, "learning_rate": 4.932038834951457e-06, "loss": 0.0049, "step": 7318 }, { "epoch": 5.070315206096294, "grad_norm": 0.25031062960624695, "learning_rate": 4.931345353675451e-06, "loss": 0.0085, "step": 7319 }, { "epoch": 5.071007966747489, "grad_norm": 0.9640106558799744, "learning_rate": 4.9306518723994455e-06, "loss": 0.0098, "step": 7320 }, { "epoch": 5.071700727398683, "grad_norm": 0.44875720143318176, "learning_rate": 4.92995839112344e-06, "loss": 0.0054, "step": 7321 }, { "epoch": 5.072393488049879, "grad_norm": 0.31168317794799805, "learning_rate": 4.9292649098474345e-06, "loss": 0.0065, "step": 7322 }, { "epoch": 5.073086248701074, "grad_norm": 0.4042441248893738, "learning_rate": 4.928571428571429e-06, "loss": 0.0057, "step": 7323 }, { "epoch": 5.073779009352269, "grad_norm": 0.28621089458465576, "learning_rate": 4.9278779472954235e-06, "loss": 0.0077, "step": 7324 }, { "epoch": 5.074471770003464, "grad_norm": 0.33940061926841736, "learning_rate": 4.927184466019418e-06, "loss": 0.0077, "step": 7325 }, { "epoch": 5.075164530654659, "grad_norm": 0.41262534260749817, "learning_rate": 4.9264909847434125e-06, "loss": 0.0102, "step": 7326 }, { "epoch": 5.075857291305854, "grad_norm": 0.23722289502620697, "learning_rate": 4.925797503467407e-06, "loss": 0.0053, "step": 7327 }, { "epoch": 5.076550051957049, "grad_norm": 0.3129745125770569, "learning_rate": 4.9251040221914015e-06, "loss": 0.0067, "step": 7328 }, { "epoch": 5.077242812608244, "grad_norm": 0.27803748846054077, "learning_rate": 4.924410540915396e-06, "loss": 0.0072, "step": 7329 }, { "epoch": 5.077935573259439, "grad_norm": 0.29906079173088074, "learning_rate": 4.9237170596393906e-06, "loss": 0.0064, "step": 7330 }, { "epoch": 5.0786283339106335, "grad_norm": 0.37967410683631897, "learning_rate": 4.923023578363385e-06, "loss": 0.0087, "step": 7331 }, { "epoch": 5.079321094561829, "grad_norm": 0.1984589397907257, "learning_rate": 4.922330097087379e-06, "loss": 0.0047, "step": 7332 }, { "epoch": 5.080013855213024, "grad_norm": 0.3248573839664459, "learning_rate": 4.921636615811374e-06, "loss": 0.0072, "step": 7333 }, { "epoch": 5.080706615864219, "grad_norm": 0.22171147167682648, "learning_rate": 4.920943134535368e-06, "loss": 0.0051, "step": 7334 }, { "epoch": 5.081399376515414, "grad_norm": 0.2765241265296936, "learning_rate": 4.920249653259363e-06, "loss": 0.006, "step": 7335 }, { "epoch": 5.082092137166609, "grad_norm": 0.40976372361183167, "learning_rate": 4.919556171983357e-06, "loss": 0.0089, "step": 7336 }, { "epoch": 5.082784897817804, "grad_norm": 0.3001773953437805, "learning_rate": 4.918862690707351e-06, "loss": 0.0058, "step": 7337 }, { "epoch": 5.083477658468999, "grad_norm": 0.29388663172721863, "learning_rate": 4.918169209431346e-06, "loss": 0.0087, "step": 7338 }, { "epoch": 5.084170419120194, "grad_norm": 0.3032190799713135, "learning_rate": 4.917475728155341e-06, "loss": 0.0054, "step": 7339 }, { "epoch": 5.084863179771389, "grad_norm": 0.32250088453292847, "learning_rate": 4.916782246879335e-06, "loss": 0.0073, "step": 7340 }, { "epoch": 5.085555940422584, "grad_norm": 0.3910949230194092, "learning_rate": 4.916088765603329e-06, "loss": 0.0084, "step": 7341 }, { "epoch": 5.086248701073779, "grad_norm": 0.3047500550746918, "learning_rate": 4.915395284327323e-06, "loss": 0.01, "step": 7342 }, { "epoch": 5.086941461724974, "grad_norm": 0.31394195556640625, "learning_rate": 4.914701803051318e-06, "loss": 0.006, "step": 7343 }, { "epoch": 5.0876342223761695, "grad_norm": 0.3759842813014984, "learning_rate": 4.914008321775313e-06, "loss": 0.0063, "step": 7344 }, { "epoch": 5.088326983027364, "grad_norm": 0.3286186158657074, "learning_rate": 4.913314840499307e-06, "loss": 0.0058, "step": 7345 }, { "epoch": 5.089019743678559, "grad_norm": 0.2560966908931732, "learning_rate": 4.912621359223301e-06, "loss": 0.008, "step": 7346 }, { "epoch": 5.089712504329754, "grad_norm": 0.2549499571323395, "learning_rate": 4.911927877947296e-06, "loss": 0.0066, "step": 7347 }, { "epoch": 5.090405264980949, "grad_norm": 0.2322193682193756, "learning_rate": 4.911234396671291e-06, "loss": 0.0055, "step": 7348 }, { "epoch": 5.091098025632144, "grad_norm": 0.3046237826347351, "learning_rate": 4.910540915395285e-06, "loss": 0.0069, "step": 7349 }, { "epoch": 5.091790786283339, "grad_norm": 0.45353612303733826, "learning_rate": 4.909847434119279e-06, "loss": 0.0058, "step": 7350 }, { "epoch": 5.092483546934534, "grad_norm": 0.2574712634086609, "learning_rate": 4.909153952843273e-06, "loss": 0.0065, "step": 7351 }, { "epoch": 5.093176307585729, "grad_norm": 0.4106120765209198, "learning_rate": 4.908460471567268e-06, "loss": 0.0073, "step": 7352 }, { "epoch": 5.093869068236924, "grad_norm": 0.416460782289505, "learning_rate": 4.907766990291263e-06, "loss": 0.0057, "step": 7353 }, { "epoch": 5.09456182888812, "grad_norm": 0.30797287821769714, "learning_rate": 4.907073509015257e-06, "loss": 0.0071, "step": 7354 }, { "epoch": 5.095254589539314, "grad_norm": 0.4874429404735565, "learning_rate": 4.906380027739251e-06, "loss": 0.0113, "step": 7355 }, { "epoch": 5.095947350190509, "grad_norm": 0.2572321593761444, "learning_rate": 4.905686546463246e-06, "loss": 0.005, "step": 7356 }, { "epoch": 5.0966401108417045, "grad_norm": 0.24332685768604279, "learning_rate": 4.90499306518724e-06, "loss": 0.0061, "step": 7357 }, { "epoch": 5.097332871492899, "grad_norm": 0.3736319839954376, "learning_rate": 4.904299583911235e-06, "loss": 0.0064, "step": 7358 }, { "epoch": 5.098025632144094, "grad_norm": 0.4131574332714081, "learning_rate": 4.903606102635229e-06, "loss": 0.0063, "step": 7359 }, { "epoch": 5.0987183927952895, "grad_norm": 0.412183940410614, "learning_rate": 4.902912621359223e-06, "loss": 0.0122, "step": 7360 }, { "epoch": 5.099411153446484, "grad_norm": 0.25543296337127686, "learning_rate": 4.902219140083218e-06, "loss": 0.0066, "step": 7361 }, { "epoch": 5.100103914097679, "grad_norm": 0.25338611006736755, "learning_rate": 4.901525658807212e-06, "loss": 0.005, "step": 7362 }, { "epoch": 5.100796674748874, "grad_norm": 0.29101377725601196, "learning_rate": 4.900832177531207e-06, "loss": 0.0065, "step": 7363 }, { "epoch": 5.101489435400069, "grad_norm": 0.25856027007102966, "learning_rate": 4.900138696255201e-06, "loss": 0.0047, "step": 7364 }, { "epoch": 5.102182196051264, "grad_norm": 0.2946327030658722, "learning_rate": 4.899445214979196e-06, "loss": 0.0058, "step": 7365 }, { "epoch": 5.102874956702459, "grad_norm": 0.25694847106933594, "learning_rate": 4.89875173370319e-06, "loss": 0.0046, "step": 7366 }, { "epoch": 5.103567717353655, "grad_norm": 0.308318167924881, "learning_rate": 4.8980582524271845e-06, "loss": 0.0065, "step": 7367 }, { "epoch": 5.104260478004849, "grad_norm": 0.33156275749206543, "learning_rate": 4.897364771151179e-06, "loss": 0.007, "step": 7368 }, { "epoch": 5.104953238656044, "grad_norm": 0.19715631008148193, "learning_rate": 4.896671289875174e-06, "loss": 0.0041, "step": 7369 }, { "epoch": 5.10564599930724, "grad_norm": 0.328476220369339, "learning_rate": 4.895977808599168e-06, "loss": 0.0065, "step": 7370 }, { "epoch": 5.106338759958434, "grad_norm": 0.40526121854782104, "learning_rate": 4.8952843273231625e-06, "loss": 0.0087, "step": 7371 }, { "epoch": 5.107031520609629, "grad_norm": 0.4126852750778198, "learning_rate": 4.894590846047157e-06, "loss": 0.0056, "step": 7372 }, { "epoch": 5.107724281260825, "grad_norm": 0.2388870120048523, "learning_rate": 4.8938973647711515e-06, "loss": 0.0046, "step": 7373 }, { "epoch": 5.10841704191202, "grad_norm": 0.31237688660621643, "learning_rate": 4.8932038834951465e-06, "loss": 0.0081, "step": 7374 }, { "epoch": 5.109109802563214, "grad_norm": 0.2459273785352707, "learning_rate": 4.8925104022191405e-06, "loss": 0.0058, "step": 7375 }, { "epoch": 5.1098025632144095, "grad_norm": 0.31756120920181274, "learning_rate": 4.891816920943135e-06, "loss": 0.0071, "step": 7376 }, { "epoch": 5.110495323865605, "grad_norm": 0.26058638095855713, "learning_rate": 4.8911234396671296e-06, "loss": 0.0066, "step": 7377 }, { "epoch": 5.111188084516799, "grad_norm": 0.4566001892089844, "learning_rate": 4.8904299583911245e-06, "loss": 0.0057, "step": 7378 }, { "epoch": 5.1118808451679945, "grad_norm": 0.25763601064682007, "learning_rate": 4.889736477115119e-06, "loss": 0.0067, "step": 7379 }, { "epoch": 5.11257360581919, "grad_norm": 0.27179789543151855, "learning_rate": 4.889042995839113e-06, "loss": 0.0068, "step": 7380 }, { "epoch": 5.113266366470384, "grad_norm": 0.3337440490722656, "learning_rate": 4.888349514563107e-06, "loss": 0.0068, "step": 7381 }, { "epoch": 5.113959127121579, "grad_norm": 0.30211561918258667, "learning_rate": 4.887656033287102e-06, "loss": 0.0049, "step": 7382 }, { "epoch": 5.114651887772775, "grad_norm": 0.30484941601753235, "learning_rate": 4.886962552011097e-06, "loss": 0.0085, "step": 7383 }, { "epoch": 5.115344648423969, "grad_norm": 0.26404300332069397, "learning_rate": 4.886269070735091e-06, "loss": 0.0072, "step": 7384 }, { "epoch": 5.116037409075164, "grad_norm": 0.26851800084114075, "learning_rate": 4.885575589459085e-06, "loss": 0.0066, "step": 7385 }, { "epoch": 5.11673016972636, "grad_norm": 0.1804685890674591, "learning_rate": 4.88488210818308e-06, "loss": 0.0047, "step": 7386 }, { "epoch": 5.117422930377555, "grad_norm": 0.40278634428977966, "learning_rate": 4.884188626907074e-06, "loss": 0.0074, "step": 7387 }, { "epoch": 5.118115691028749, "grad_norm": 0.316924124956131, "learning_rate": 4.883495145631069e-06, "loss": 0.0098, "step": 7388 }, { "epoch": 5.118808451679945, "grad_norm": 0.3233623504638672, "learning_rate": 4.882801664355063e-06, "loss": 0.0059, "step": 7389 }, { "epoch": 5.11950121233114, "grad_norm": 0.8516749143600464, "learning_rate": 4.882108183079057e-06, "loss": 0.0052, "step": 7390 }, { "epoch": 5.120193972982334, "grad_norm": 0.3691239655017853, "learning_rate": 4.881414701803052e-06, "loss": 0.0086, "step": 7391 }, { "epoch": 5.1208867336335295, "grad_norm": 0.25633835792541504, "learning_rate": 4.880721220527046e-06, "loss": 0.005, "step": 7392 }, { "epoch": 5.121579494284725, "grad_norm": 0.2983531057834625, "learning_rate": 4.880027739251041e-06, "loss": 0.0058, "step": 7393 }, { "epoch": 5.12227225493592, "grad_norm": 0.3667583465576172, "learning_rate": 4.879334257975035e-06, "loss": 0.006, "step": 7394 }, { "epoch": 5.1229650155871145, "grad_norm": 0.27222368121147156, "learning_rate": 4.87864077669903e-06, "loss": 0.009, "step": 7395 }, { "epoch": 5.12365777623831, "grad_norm": 0.3674112856388092, "learning_rate": 4.877947295423024e-06, "loss": 0.0061, "step": 7396 }, { "epoch": 5.124350536889505, "grad_norm": 0.4103373885154724, "learning_rate": 4.877253814147018e-06, "loss": 0.0075, "step": 7397 }, { "epoch": 5.125043297540699, "grad_norm": 0.44219517707824707, "learning_rate": 4.876560332871013e-06, "loss": 0.0061, "step": 7398 }, { "epoch": 5.125736058191895, "grad_norm": 0.42452719807624817, "learning_rate": 4.875866851595007e-06, "loss": 0.0078, "step": 7399 }, { "epoch": 5.12642881884309, "grad_norm": 0.28701063990592957, "learning_rate": 4.875173370319002e-06, "loss": 0.0068, "step": 7400 }, { "epoch": 5.127121579494284, "grad_norm": 0.28020837903022766, "learning_rate": 4.874479889042996e-06, "loss": 0.0059, "step": 7401 }, { "epoch": 5.12781434014548, "grad_norm": 0.44853636622428894, "learning_rate": 4.87378640776699e-06, "loss": 0.0093, "step": 7402 }, { "epoch": 5.128507100796675, "grad_norm": 0.34930720925331116, "learning_rate": 4.873092926490985e-06, "loss": 0.0066, "step": 7403 }, { "epoch": 5.129199861447869, "grad_norm": 0.501854658126831, "learning_rate": 4.87239944521498e-06, "loss": 0.0056, "step": 7404 }, { "epoch": 5.129892622099065, "grad_norm": 0.2529129087924957, "learning_rate": 4.871705963938974e-06, "loss": 0.0055, "step": 7405 }, { "epoch": 5.13058538275026, "grad_norm": 0.7160843014717102, "learning_rate": 4.871012482662968e-06, "loss": 0.0091, "step": 7406 }, { "epoch": 5.131278143401455, "grad_norm": 0.39088577032089233, "learning_rate": 4.870319001386963e-06, "loss": 0.0114, "step": 7407 }, { "epoch": 5.1319709040526496, "grad_norm": 0.43904075026512146, "learning_rate": 4.869625520110957e-06, "loss": 0.0084, "step": 7408 }, { "epoch": 5.132663664703845, "grad_norm": 0.2854726016521454, "learning_rate": 4.868932038834952e-06, "loss": 0.0082, "step": 7409 }, { "epoch": 5.13335642535504, "grad_norm": 0.233002707362175, "learning_rate": 4.868238557558946e-06, "loss": 0.007, "step": 7410 }, { "epoch": 5.1340491860062345, "grad_norm": 0.3564295172691345, "learning_rate": 4.86754507628294e-06, "loss": 0.0083, "step": 7411 }, { "epoch": 5.13474194665743, "grad_norm": 0.32407668232917786, "learning_rate": 4.866851595006935e-06, "loss": 0.0071, "step": 7412 }, { "epoch": 5.135434707308625, "grad_norm": 0.24932849407196045, "learning_rate": 4.866158113730929e-06, "loss": 0.0072, "step": 7413 }, { "epoch": 5.13612746795982, "grad_norm": 0.35719776153564453, "learning_rate": 4.865464632454924e-06, "loss": 0.0078, "step": 7414 }, { "epoch": 5.136820228611015, "grad_norm": 0.36703062057495117, "learning_rate": 4.864771151178918e-06, "loss": 0.0098, "step": 7415 }, { "epoch": 5.13751298926221, "grad_norm": 0.3459904193878174, "learning_rate": 4.864077669902913e-06, "loss": 0.0075, "step": 7416 }, { "epoch": 5.138205749913405, "grad_norm": 0.3044803738594055, "learning_rate": 4.863384188626907e-06, "loss": 0.0075, "step": 7417 }, { "epoch": 5.1388985105646, "grad_norm": 0.3431564271450043, "learning_rate": 4.8626907073509015e-06, "loss": 0.008, "step": 7418 }, { "epoch": 5.139591271215795, "grad_norm": 0.3503017723560333, "learning_rate": 4.8619972260748964e-06, "loss": 0.0103, "step": 7419 }, { "epoch": 5.14028403186699, "grad_norm": 0.3378048241138458, "learning_rate": 4.8613037447988905e-06, "loss": 0.0053, "step": 7420 }, { "epoch": 5.140976792518185, "grad_norm": 0.22293899953365326, "learning_rate": 4.8606102635228855e-06, "loss": 0.0051, "step": 7421 }, { "epoch": 5.14166955316938, "grad_norm": 0.251714289188385, "learning_rate": 4.8599167822468795e-06, "loss": 0.0057, "step": 7422 }, { "epoch": 5.142362313820575, "grad_norm": 0.2762012183666229, "learning_rate": 4.859223300970874e-06, "loss": 0.0078, "step": 7423 }, { "epoch": 5.14305507447177, "grad_norm": 0.41653135418891907, "learning_rate": 4.8585298196948686e-06, "loss": 0.0088, "step": 7424 }, { "epoch": 5.143747835122965, "grad_norm": 0.42686501145362854, "learning_rate": 4.8578363384188635e-06, "loss": 0.0107, "step": 7425 }, { "epoch": 5.14444059577416, "grad_norm": 0.2930285334587097, "learning_rate": 4.857142857142858e-06, "loss": 0.0066, "step": 7426 }, { "epoch": 5.145133356425355, "grad_norm": 0.45484989881515503, "learning_rate": 4.856449375866852e-06, "loss": 0.005, "step": 7427 }, { "epoch": 5.14582611707655, "grad_norm": 0.412971556186676, "learning_rate": 4.855755894590846e-06, "loss": 0.0083, "step": 7428 }, { "epoch": 5.146518877727745, "grad_norm": 0.3098788857460022, "learning_rate": 4.855062413314841e-06, "loss": 0.0064, "step": 7429 }, { "epoch": 5.14721163837894, "grad_norm": 0.29858165979385376, "learning_rate": 4.854368932038836e-06, "loss": 0.0055, "step": 7430 }, { "epoch": 5.147904399030135, "grad_norm": 0.30740079283714294, "learning_rate": 4.85367545076283e-06, "loss": 0.0083, "step": 7431 }, { "epoch": 5.14859715968133, "grad_norm": 0.29849037528038025, "learning_rate": 4.852981969486824e-06, "loss": 0.0064, "step": 7432 }, { "epoch": 5.149289920332525, "grad_norm": 0.6315860748291016, "learning_rate": 4.852288488210819e-06, "loss": 0.0069, "step": 7433 }, { "epoch": 5.14998268098372, "grad_norm": 0.40717795491218567, "learning_rate": 4.851595006934814e-06, "loss": 0.006, "step": 7434 }, { "epoch": 5.150675441634915, "grad_norm": 0.3300587236881256, "learning_rate": 4.850901525658808e-06, "loss": 0.0073, "step": 7435 }, { "epoch": 5.15136820228611, "grad_norm": 0.26116156578063965, "learning_rate": 4.850208044382802e-06, "loss": 0.0056, "step": 7436 }, { "epoch": 5.1520609629373055, "grad_norm": 0.27881118655204773, "learning_rate": 4.849514563106797e-06, "loss": 0.0056, "step": 7437 }, { "epoch": 5.1527537235885, "grad_norm": 0.46491703391075134, "learning_rate": 4.848821081830791e-06, "loss": 0.0068, "step": 7438 }, { "epoch": 5.153446484239695, "grad_norm": 0.3274855315685272, "learning_rate": 4.848127600554786e-06, "loss": 0.0102, "step": 7439 }, { "epoch": 5.1541392448908905, "grad_norm": 0.36323603987693787, "learning_rate": 4.84743411927878e-06, "loss": 0.0069, "step": 7440 }, { "epoch": 5.154832005542085, "grad_norm": 1.0078736543655396, "learning_rate": 4.846740638002774e-06, "loss": 0.0106, "step": 7441 }, { "epoch": 5.15552476619328, "grad_norm": 0.3542138934135437, "learning_rate": 4.846047156726769e-06, "loss": 0.006, "step": 7442 }, { "epoch": 5.156217526844475, "grad_norm": 0.3060283958911896, "learning_rate": 4.845353675450763e-06, "loss": 0.005, "step": 7443 }, { "epoch": 5.15691028749567, "grad_norm": 0.39142200350761414, "learning_rate": 4.844660194174758e-06, "loss": 0.007, "step": 7444 }, { "epoch": 5.157603048146865, "grad_norm": 0.4000868499279022, "learning_rate": 4.843966712898752e-06, "loss": 0.0062, "step": 7445 }, { "epoch": 5.15829580879806, "grad_norm": 0.3588486313819885, "learning_rate": 4.843273231622747e-06, "loss": 0.0104, "step": 7446 }, { "epoch": 5.158988569449256, "grad_norm": 0.26281294226646423, "learning_rate": 4.842579750346741e-06, "loss": 0.0049, "step": 7447 }, { "epoch": 5.15968133010045, "grad_norm": 0.27854153513908386, "learning_rate": 4.841886269070735e-06, "loss": 0.0047, "step": 7448 }, { "epoch": 5.160374090751645, "grad_norm": 0.34578070044517517, "learning_rate": 4.84119278779473e-06, "loss": 0.0106, "step": 7449 }, { "epoch": 5.161066851402841, "grad_norm": 0.3037815988063812, "learning_rate": 4.840499306518724e-06, "loss": 0.0057, "step": 7450 }, { "epoch": 5.161759612054035, "grad_norm": 0.30182409286499023, "learning_rate": 4.839805825242719e-06, "loss": 0.0087, "step": 7451 }, { "epoch": 5.16245237270523, "grad_norm": 0.277145117521286, "learning_rate": 4.839112343966713e-06, "loss": 0.0053, "step": 7452 }, { "epoch": 5.1631451333564256, "grad_norm": 0.2836417257785797, "learning_rate": 4.838418862690707e-06, "loss": 0.0059, "step": 7453 }, { "epoch": 5.16383789400762, "grad_norm": 0.4726655185222626, "learning_rate": 4.837725381414702e-06, "loss": 0.0079, "step": 7454 }, { "epoch": 5.164530654658815, "grad_norm": 0.33091259002685547, "learning_rate": 4.837031900138697e-06, "loss": 0.0061, "step": 7455 }, { "epoch": 5.1652234153100105, "grad_norm": 0.3957972228527069, "learning_rate": 4.836338418862691e-06, "loss": 0.0085, "step": 7456 }, { "epoch": 5.165916175961206, "grad_norm": 0.42114585638046265, "learning_rate": 4.835644937586685e-06, "loss": 0.0102, "step": 7457 }, { "epoch": 5.1666089366124, "grad_norm": 0.33106687664985657, "learning_rate": 4.834951456310679e-06, "loss": 0.007, "step": 7458 }, { "epoch": 5.1673016972635955, "grad_norm": 0.28504499793052673, "learning_rate": 4.834257975034674e-06, "loss": 0.0063, "step": 7459 }, { "epoch": 5.167994457914791, "grad_norm": 0.3100307881832123, "learning_rate": 4.833564493758669e-06, "loss": 0.009, "step": 7460 }, { "epoch": 5.168687218565985, "grad_norm": 0.5789116621017456, "learning_rate": 4.832871012482663e-06, "loss": 0.007, "step": 7461 }, { "epoch": 5.16937997921718, "grad_norm": 0.25984102487564087, "learning_rate": 4.832177531206657e-06, "loss": 0.0045, "step": 7462 }, { "epoch": 5.170072739868376, "grad_norm": 0.46192842721939087, "learning_rate": 4.831484049930652e-06, "loss": 0.0085, "step": 7463 }, { "epoch": 5.17076550051957, "grad_norm": 0.48657897114753723, "learning_rate": 4.830790568654647e-06, "loss": 0.0084, "step": 7464 }, { "epoch": 5.171458261170765, "grad_norm": 0.23726239800453186, "learning_rate": 4.830097087378641e-06, "loss": 0.0046, "step": 7465 }, { "epoch": 5.172151021821961, "grad_norm": 0.37569645047187805, "learning_rate": 4.8294036061026354e-06, "loss": 0.0085, "step": 7466 }, { "epoch": 5.172843782473156, "grad_norm": 0.3211580514907837, "learning_rate": 4.8287101248266295e-06, "loss": 0.0059, "step": 7467 }, { "epoch": 5.17353654312435, "grad_norm": 0.24997110664844513, "learning_rate": 4.8280166435506245e-06, "loss": 0.0057, "step": 7468 }, { "epoch": 5.174229303775546, "grad_norm": 0.28084564208984375, "learning_rate": 4.827323162274619e-06, "loss": 0.0089, "step": 7469 }, { "epoch": 5.174922064426741, "grad_norm": 0.36999210715293884, "learning_rate": 4.8266296809986135e-06, "loss": 0.0079, "step": 7470 }, { "epoch": 5.175614825077935, "grad_norm": 0.36330100893974304, "learning_rate": 4.8259361997226076e-06, "loss": 0.0078, "step": 7471 }, { "epoch": 5.1763075857291305, "grad_norm": 0.45394986867904663, "learning_rate": 4.8252427184466025e-06, "loss": 0.0064, "step": 7472 }, { "epoch": 5.177000346380326, "grad_norm": 0.42422235012054443, "learning_rate": 4.824549237170597e-06, "loss": 0.0091, "step": 7473 }, { "epoch": 5.17769310703152, "grad_norm": 0.21235227584838867, "learning_rate": 4.8238557558945915e-06, "loss": 0.0048, "step": 7474 }, { "epoch": 5.1783858676827155, "grad_norm": 0.42402246594429016, "learning_rate": 4.823162274618586e-06, "loss": 0.0055, "step": 7475 }, { "epoch": 5.179078628333911, "grad_norm": 0.33939579129219055, "learning_rate": 4.82246879334258e-06, "loss": 0.0066, "step": 7476 }, { "epoch": 5.179771388985106, "grad_norm": 0.3085120916366577, "learning_rate": 4.821775312066575e-06, "loss": 0.0068, "step": 7477 }, { "epoch": 5.1804641496363, "grad_norm": 0.4421272575855255, "learning_rate": 4.821081830790569e-06, "loss": 0.0111, "step": 7478 }, { "epoch": 5.181156910287496, "grad_norm": 0.3783268630504608, "learning_rate": 4.820388349514564e-06, "loss": 0.0073, "step": 7479 }, { "epoch": 5.181849670938691, "grad_norm": 0.2809380292892456, "learning_rate": 4.819694868238558e-06, "loss": 0.0078, "step": 7480 }, { "epoch": 5.182542431589885, "grad_norm": 0.31724241375923157, "learning_rate": 4.819001386962553e-06, "loss": 0.0079, "step": 7481 }, { "epoch": 5.183235192241081, "grad_norm": 0.5670633912086487, "learning_rate": 4.818307905686547e-06, "loss": 0.0049, "step": 7482 }, { "epoch": 5.183927952892276, "grad_norm": 0.27482205629348755, "learning_rate": 4.817614424410541e-06, "loss": 0.006, "step": 7483 }, { "epoch": 5.18462071354347, "grad_norm": 0.2535606622695923, "learning_rate": 4.816920943134536e-06, "loss": 0.0063, "step": 7484 }, { "epoch": 5.185313474194666, "grad_norm": 0.28620415925979614, "learning_rate": 4.816227461858531e-06, "loss": 0.0093, "step": 7485 }, { "epoch": 5.186006234845861, "grad_norm": 0.2894137501716614, "learning_rate": 4.815533980582525e-06, "loss": 0.0084, "step": 7486 }, { "epoch": 5.186698995497056, "grad_norm": 0.36168500781059265, "learning_rate": 4.814840499306519e-06, "loss": 0.0079, "step": 7487 }, { "epoch": 5.1873917561482505, "grad_norm": 0.2551228404045105, "learning_rate": 4.814147018030513e-06, "loss": 0.0069, "step": 7488 }, { "epoch": 5.188084516799446, "grad_norm": 0.29550832509994507, "learning_rate": 4.813453536754508e-06, "loss": 0.0086, "step": 7489 }, { "epoch": 5.188777277450641, "grad_norm": 0.3164914548397064, "learning_rate": 4.812760055478503e-06, "loss": 0.0077, "step": 7490 }, { "epoch": 5.1894700381018355, "grad_norm": 0.20070882141590118, "learning_rate": 4.812066574202497e-06, "loss": 0.0052, "step": 7491 }, { "epoch": 5.190162798753031, "grad_norm": 0.33653250336647034, "learning_rate": 4.811373092926491e-06, "loss": 0.007, "step": 7492 }, { "epoch": 5.190855559404226, "grad_norm": 0.38830628991127014, "learning_rate": 4.810679611650486e-06, "loss": 0.0091, "step": 7493 }, { "epoch": 5.19154832005542, "grad_norm": 0.25447553396224976, "learning_rate": 4.809986130374481e-06, "loss": 0.0056, "step": 7494 }, { "epoch": 5.192241080706616, "grad_norm": 0.356847882270813, "learning_rate": 4.809292649098475e-06, "loss": 0.0105, "step": 7495 }, { "epoch": 5.192933841357811, "grad_norm": 0.3971211910247803, "learning_rate": 4.808599167822469e-06, "loss": 0.0073, "step": 7496 }, { "epoch": 5.193626602009006, "grad_norm": 0.2789386212825775, "learning_rate": 4.807905686546463e-06, "loss": 0.0086, "step": 7497 }, { "epoch": 5.194319362660201, "grad_norm": 0.4150344431400299, "learning_rate": 4.807212205270458e-06, "loss": 0.0069, "step": 7498 }, { "epoch": 5.195012123311396, "grad_norm": 0.3591524660587311, "learning_rate": 4.806518723994453e-06, "loss": 0.0064, "step": 7499 }, { "epoch": 5.195704883962591, "grad_norm": 0.30835917592048645, "learning_rate": 4.805825242718447e-06, "loss": 0.007, "step": 7500 }, { "epoch": 5.196397644613786, "grad_norm": 0.36681753396987915, "learning_rate": 4.805131761442441e-06, "loss": 0.008, "step": 7501 }, { "epoch": 5.197090405264981, "grad_norm": 0.30755317211151123, "learning_rate": 4.804438280166436e-06, "loss": 0.0073, "step": 7502 }, { "epoch": 5.197783165916176, "grad_norm": 0.40762585401535034, "learning_rate": 4.80374479889043e-06, "loss": 0.0083, "step": 7503 }, { "epoch": 5.198475926567371, "grad_norm": 0.26317718625068665, "learning_rate": 4.803051317614425e-06, "loss": 0.0051, "step": 7504 }, { "epoch": 5.199168687218566, "grad_norm": 0.48002341389656067, "learning_rate": 4.802357836338419e-06, "loss": 0.0064, "step": 7505 }, { "epoch": 5.199861447869761, "grad_norm": 0.3856843411922455, "learning_rate": 4.801664355062413e-06, "loss": 0.0081, "step": 7506 }, { "epoch": 5.200554208520956, "grad_norm": 0.3101407289505005, "learning_rate": 4.800970873786408e-06, "loss": 0.0062, "step": 7507 }, { "epoch": 5.201246969172151, "grad_norm": 0.29151254892349243, "learning_rate": 4.800277392510402e-06, "loss": 0.0072, "step": 7508 }, { "epoch": 5.201939729823346, "grad_norm": 0.3234097361564636, "learning_rate": 4.799583911234397e-06, "loss": 0.0076, "step": 7509 }, { "epoch": 5.202632490474541, "grad_norm": 0.32400745153427124, "learning_rate": 4.798890429958391e-06, "loss": 0.0118, "step": 7510 }, { "epoch": 5.203325251125736, "grad_norm": 0.350826233625412, "learning_rate": 4.798196948682386e-06, "loss": 0.0098, "step": 7511 }, { "epoch": 5.204018011776931, "grad_norm": 0.2611273229122162, "learning_rate": 4.79750346740638e-06, "loss": 0.005, "step": 7512 }, { "epoch": 5.204710772428126, "grad_norm": 0.5315921306610107, "learning_rate": 4.7968099861303744e-06, "loss": 0.008, "step": 7513 }, { "epoch": 5.205403533079321, "grad_norm": 0.30067697167396545, "learning_rate": 4.796116504854369e-06, "loss": 0.0069, "step": 7514 }, { "epoch": 5.206096293730516, "grad_norm": 0.37230080366134644, "learning_rate": 4.7954230235783635e-06, "loss": 0.0065, "step": 7515 }, { "epoch": 5.206789054381711, "grad_norm": 0.36270418763160706, "learning_rate": 4.794729542302358e-06, "loss": 0.0084, "step": 7516 }, { "epoch": 5.2074818150329065, "grad_norm": 0.29917728900909424, "learning_rate": 4.7940360610263525e-06, "loss": 0.0063, "step": 7517 }, { "epoch": 5.208174575684101, "grad_norm": 0.3706039488315582, "learning_rate": 4.7933425797503466e-06, "loss": 0.0076, "step": 7518 }, { "epoch": 5.208867336335296, "grad_norm": 0.37487998604774475, "learning_rate": 4.7926490984743415e-06, "loss": 0.0068, "step": 7519 }, { "epoch": 5.2095600969864915, "grad_norm": 0.20690150558948517, "learning_rate": 4.7919556171983364e-06, "loss": 0.0046, "step": 7520 }, { "epoch": 5.210252857637686, "grad_norm": 0.340813010931015, "learning_rate": 4.7912621359223305e-06, "loss": 0.0071, "step": 7521 }, { "epoch": 5.210945618288881, "grad_norm": 0.19376729428768158, "learning_rate": 4.790568654646325e-06, "loss": 0.0038, "step": 7522 }, { "epoch": 5.211638378940076, "grad_norm": 0.3811071515083313, "learning_rate": 4.7898751733703195e-06, "loss": 0.0101, "step": 7523 }, { "epoch": 5.212331139591271, "grad_norm": 0.26559945940971375, "learning_rate": 4.789181692094314e-06, "loss": 0.0057, "step": 7524 }, { "epoch": 5.213023900242466, "grad_norm": 0.2858808636665344, "learning_rate": 4.7884882108183086e-06, "loss": 0.0064, "step": 7525 }, { "epoch": 5.213716660893661, "grad_norm": 0.29177382588386536, "learning_rate": 4.787794729542303e-06, "loss": 0.0077, "step": 7526 }, { "epoch": 5.214409421544857, "grad_norm": 0.20582586526870728, "learning_rate": 4.787101248266297e-06, "loss": 0.0059, "step": 7527 }, { "epoch": 5.215102182196051, "grad_norm": 0.26234903931617737, "learning_rate": 4.786407766990292e-06, "loss": 0.0061, "step": 7528 }, { "epoch": 5.215794942847246, "grad_norm": 0.32922959327697754, "learning_rate": 4.785714285714287e-06, "loss": 0.0084, "step": 7529 }, { "epoch": 5.216487703498442, "grad_norm": 0.3447861671447754, "learning_rate": 4.785020804438281e-06, "loss": 0.0073, "step": 7530 }, { "epoch": 5.217180464149636, "grad_norm": 0.5398739576339722, "learning_rate": 4.784327323162275e-06, "loss": 0.0053, "step": 7531 }, { "epoch": 5.217873224800831, "grad_norm": 0.3625837564468384, "learning_rate": 4.78363384188627e-06, "loss": 0.0108, "step": 7532 }, { "epoch": 5.2185659854520265, "grad_norm": 0.4039744436740875, "learning_rate": 4.782940360610264e-06, "loss": 0.0072, "step": 7533 }, { "epoch": 5.219258746103221, "grad_norm": 0.3538312315940857, "learning_rate": 4.782246879334259e-06, "loss": 0.0078, "step": 7534 }, { "epoch": 5.219951506754416, "grad_norm": 0.3611268103122711, "learning_rate": 4.781553398058253e-06, "loss": 0.007, "step": 7535 }, { "epoch": 5.2206442674056115, "grad_norm": 0.35851985216140747, "learning_rate": 4.780859916782247e-06, "loss": 0.0074, "step": 7536 }, { "epoch": 5.221337028056807, "grad_norm": 0.3169112205505371, "learning_rate": 4.780166435506242e-06, "loss": 0.0076, "step": 7537 }, { "epoch": 5.222029788708001, "grad_norm": 0.34461987018585205, "learning_rate": 4.779472954230236e-06, "loss": 0.0073, "step": 7538 }, { "epoch": 5.222722549359196, "grad_norm": 0.27043017745018005, "learning_rate": 4.778779472954231e-06, "loss": 0.0058, "step": 7539 }, { "epoch": 5.223415310010392, "grad_norm": 0.3345284163951874, "learning_rate": 4.778085991678225e-06, "loss": 0.0065, "step": 7540 }, { "epoch": 5.224108070661586, "grad_norm": 0.42769333720207214, "learning_rate": 4.77739251040222e-06, "loss": 0.0082, "step": 7541 }, { "epoch": 5.224800831312781, "grad_norm": 0.3785141706466675, "learning_rate": 4.776699029126214e-06, "loss": 0.0092, "step": 7542 }, { "epoch": 5.225493591963977, "grad_norm": 0.2181730568408966, "learning_rate": 4.776005547850208e-06, "loss": 0.0041, "step": 7543 }, { "epoch": 5.226186352615171, "grad_norm": 0.3941933214664459, "learning_rate": 4.775312066574203e-06, "loss": 0.0061, "step": 7544 }, { "epoch": 5.226879113266366, "grad_norm": 0.22325095534324646, "learning_rate": 4.774618585298197e-06, "loss": 0.0058, "step": 7545 }, { "epoch": 5.227571873917562, "grad_norm": 0.36726585030555725, "learning_rate": 4.773925104022192e-06, "loss": 0.0067, "step": 7546 }, { "epoch": 5.228264634568757, "grad_norm": 0.25020918250083923, "learning_rate": 4.773231622746186e-06, "loss": 0.0075, "step": 7547 }, { "epoch": 5.228957395219951, "grad_norm": 0.2851022779941559, "learning_rate": 4.77253814147018e-06, "loss": 0.0069, "step": 7548 }, { "epoch": 5.229650155871147, "grad_norm": 0.28374752402305603, "learning_rate": 4.771844660194175e-06, "loss": 0.0071, "step": 7549 }, { "epoch": 5.230342916522342, "grad_norm": 0.3818637728691101, "learning_rate": 4.77115117891817e-06, "loss": 0.0081, "step": 7550 }, { "epoch": 5.231035677173536, "grad_norm": 0.8067923784255981, "learning_rate": 4.770457697642164e-06, "loss": 0.0081, "step": 7551 }, { "epoch": 5.2317284378247315, "grad_norm": 0.4985780715942383, "learning_rate": 4.769764216366158e-06, "loss": 0.0086, "step": 7552 }, { "epoch": 5.232421198475927, "grad_norm": 0.3261982798576355, "learning_rate": 4.769070735090153e-06, "loss": 0.007, "step": 7553 }, { "epoch": 5.233113959127121, "grad_norm": 0.3235166668891907, "learning_rate": 4.768377253814147e-06, "loss": 0.0077, "step": 7554 }, { "epoch": 5.2338067197783165, "grad_norm": 0.3782225251197815, "learning_rate": 4.767683772538142e-06, "loss": 0.005, "step": 7555 }, { "epoch": 5.234499480429512, "grad_norm": 0.3849373459815979, "learning_rate": 4.766990291262136e-06, "loss": 0.0092, "step": 7556 }, { "epoch": 5.235192241080707, "grad_norm": 0.36994263529777527, "learning_rate": 4.76629680998613e-06, "loss": 0.007, "step": 7557 }, { "epoch": 5.235885001731901, "grad_norm": 0.3145955502986908, "learning_rate": 4.765603328710125e-06, "loss": 0.0066, "step": 7558 }, { "epoch": 5.236577762383097, "grad_norm": 0.2789524793624878, "learning_rate": 4.76490984743412e-06, "loss": 0.0066, "step": 7559 }, { "epoch": 5.237270523034292, "grad_norm": 0.509216845035553, "learning_rate": 4.764216366158114e-06, "loss": 0.0066, "step": 7560 }, { "epoch": 5.237963283685486, "grad_norm": 0.44652724266052246, "learning_rate": 4.763522884882108e-06, "loss": 0.0074, "step": 7561 }, { "epoch": 5.238656044336682, "grad_norm": 0.3717997074127197, "learning_rate": 4.762829403606103e-06, "loss": 0.008, "step": 7562 }, { "epoch": 5.239348804987877, "grad_norm": 0.30628785490989685, "learning_rate": 4.762135922330097e-06, "loss": 0.0068, "step": 7563 }, { "epoch": 5.240041565639071, "grad_norm": 0.27223464846611023, "learning_rate": 4.761442441054092e-06, "loss": 0.0059, "step": 7564 }, { "epoch": 5.240734326290267, "grad_norm": 0.2744392454624176, "learning_rate": 4.760748959778086e-06, "loss": 0.0058, "step": 7565 }, { "epoch": 5.241427086941462, "grad_norm": 0.3380274474620819, "learning_rate": 4.7600554785020805e-06, "loss": 0.0074, "step": 7566 }, { "epoch": 5.242119847592657, "grad_norm": 0.41907307505607605, "learning_rate": 4.7593619972260754e-06, "loss": 0.0086, "step": 7567 }, { "epoch": 5.2428126082438515, "grad_norm": 0.2549433410167694, "learning_rate": 4.7586685159500695e-06, "loss": 0.0054, "step": 7568 }, { "epoch": 5.243505368895047, "grad_norm": 0.5552940368652344, "learning_rate": 4.7579750346740645e-06, "loss": 0.0075, "step": 7569 }, { "epoch": 5.244198129546242, "grad_norm": 0.4340652525424957, "learning_rate": 4.7572815533980585e-06, "loss": 0.0109, "step": 7570 }, { "epoch": 5.2448908901974365, "grad_norm": 0.3300117254257202, "learning_rate": 4.7565880721220535e-06, "loss": 0.0074, "step": 7571 }, { "epoch": 5.245583650848632, "grad_norm": 0.3856252431869507, "learning_rate": 4.7558945908460476e-06, "loss": 0.007, "step": 7572 }, { "epoch": 5.246276411499827, "grad_norm": 0.5354393720626831, "learning_rate": 4.755201109570042e-06, "loss": 0.0061, "step": 7573 }, { "epoch": 5.246969172151021, "grad_norm": 0.43714696168899536, "learning_rate": 4.754507628294037e-06, "loss": 0.0089, "step": 7574 }, { "epoch": 5.247661932802217, "grad_norm": 0.2474542111158371, "learning_rate": 4.753814147018031e-06, "loss": 0.0056, "step": 7575 }, { "epoch": 5.248354693453412, "grad_norm": 0.31160688400268555, "learning_rate": 4.753120665742026e-06, "loss": 0.0069, "step": 7576 }, { "epoch": 5.249047454104607, "grad_norm": 0.3125763237476349, "learning_rate": 4.75242718446602e-06, "loss": 0.0075, "step": 7577 }, { "epoch": 5.249740214755802, "grad_norm": 0.354356586933136, "learning_rate": 4.751733703190014e-06, "loss": 0.0088, "step": 7578 }, { "epoch": 5.250432975406997, "grad_norm": 0.26645520329475403, "learning_rate": 4.751040221914009e-06, "loss": 0.0063, "step": 7579 }, { "epoch": 5.251125736058192, "grad_norm": 0.4143542945384979, "learning_rate": 4.750346740638004e-06, "loss": 0.0088, "step": 7580 }, { "epoch": 5.251818496709387, "grad_norm": 0.2791270315647125, "learning_rate": 4.749653259361998e-06, "loss": 0.006, "step": 7581 }, { "epoch": 5.252511257360582, "grad_norm": 0.32284653186798096, "learning_rate": 4.748959778085992e-06, "loss": 0.0079, "step": 7582 }, { "epoch": 5.253204018011777, "grad_norm": 0.3721042275428772, "learning_rate": 4.748266296809986e-06, "loss": 0.0085, "step": 7583 }, { "epoch": 5.253896778662972, "grad_norm": 0.27737486362457275, "learning_rate": 4.747572815533981e-06, "loss": 0.0056, "step": 7584 }, { "epoch": 5.254589539314167, "grad_norm": 0.3773183226585388, "learning_rate": 4.746879334257976e-06, "loss": 0.0112, "step": 7585 }, { "epoch": 5.255282299965362, "grad_norm": 0.6213601231575012, "learning_rate": 4.74618585298197e-06, "loss": 0.0089, "step": 7586 }, { "epoch": 5.255975060616557, "grad_norm": 0.37571918964385986, "learning_rate": 4.745492371705964e-06, "loss": 0.008, "step": 7587 }, { "epoch": 5.256667821267752, "grad_norm": 0.39039427042007446, "learning_rate": 4.744798890429959e-06, "loss": 0.0074, "step": 7588 }, { "epoch": 5.257360581918947, "grad_norm": 0.3099873661994934, "learning_rate": 4.744105409153954e-06, "loss": 0.0068, "step": 7589 }, { "epoch": 5.258053342570142, "grad_norm": 0.41337692737579346, "learning_rate": 4.743411927877948e-06, "loss": 0.0077, "step": 7590 }, { "epoch": 5.258746103221337, "grad_norm": 0.38124918937683105, "learning_rate": 4.742718446601942e-06, "loss": 0.0063, "step": 7591 }, { "epoch": 5.259438863872532, "grad_norm": 0.295126736164093, "learning_rate": 4.742024965325936e-06, "loss": 0.008, "step": 7592 }, { "epoch": 5.260131624523727, "grad_norm": 0.27431103587150574, "learning_rate": 4.741331484049931e-06, "loss": 0.0054, "step": 7593 }, { "epoch": 5.260824385174922, "grad_norm": 0.28997060656547546, "learning_rate": 4.740638002773926e-06, "loss": 0.005, "step": 7594 }, { "epoch": 5.261517145826117, "grad_norm": 0.37890487909317017, "learning_rate": 4.73994452149792e-06, "loss": 0.0051, "step": 7595 }, { "epoch": 5.262209906477312, "grad_norm": 0.44293665885925293, "learning_rate": 4.739251040221914e-06, "loss": 0.0104, "step": 7596 }, { "epoch": 5.2629026671285075, "grad_norm": 0.24331487715244293, "learning_rate": 4.738557558945909e-06, "loss": 0.0062, "step": 7597 }, { "epoch": 5.263595427779702, "grad_norm": 0.4369862675666809, "learning_rate": 4.737864077669903e-06, "loss": 0.0062, "step": 7598 }, { "epoch": 5.264288188430897, "grad_norm": 0.5328646302223206, "learning_rate": 4.737170596393898e-06, "loss": 0.0089, "step": 7599 }, { "epoch": 5.2649809490820925, "grad_norm": 0.4848049283027649, "learning_rate": 4.736477115117892e-06, "loss": 0.0085, "step": 7600 }, { "epoch": 5.265673709733287, "grad_norm": 0.5734719634056091, "learning_rate": 4.735783633841887e-06, "loss": 0.0088, "step": 7601 }, { "epoch": 5.266366470384482, "grad_norm": 0.3014770448207855, "learning_rate": 4.735090152565881e-06, "loss": 0.005, "step": 7602 }, { "epoch": 5.267059231035677, "grad_norm": 0.4135104715824127, "learning_rate": 4.734396671289875e-06, "loss": 0.006, "step": 7603 }, { "epoch": 5.267751991686872, "grad_norm": 0.40195783972740173, "learning_rate": 4.73370319001387e-06, "loss": 0.007, "step": 7604 }, { "epoch": 5.268444752338067, "grad_norm": 0.25867411494255066, "learning_rate": 4.733009708737864e-06, "loss": 0.0065, "step": 7605 }, { "epoch": 5.269137512989262, "grad_norm": 0.4152539372444153, "learning_rate": 4.732316227461859e-06, "loss": 0.0096, "step": 7606 }, { "epoch": 5.269830273640457, "grad_norm": 0.38939064741134644, "learning_rate": 4.731622746185853e-06, "loss": 0.0075, "step": 7607 }, { "epoch": 5.270523034291652, "grad_norm": 0.317944198846817, "learning_rate": 4.730929264909847e-06, "loss": 0.007, "step": 7608 }, { "epoch": 5.271215794942847, "grad_norm": 0.29861703515052795, "learning_rate": 4.730235783633842e-06, "loss": 0.0062, "step": 7609 }, { "epoch": 5.271908555594043, "grad_norm": 0.2839019000530243, "learning_rate": 4.729542302357837e-06, "loss": 0.0068, "step": 7610 }, { "epoch": 5.272601316245237, "grad_norm": 0.3510481119155884, "learning_rate": 4.728848821081831e-06, "loss": 0.0072, "step": 7611 }, { "epoch": 5.273294076896432, "grad_norm": 0.3694562017917633, "learning_rate": 4.728155339805825e-06, "loss": 0.0051, "step": 7612 }, { "epoch": 5.2739868375476275, "grad_norm": 0.392853707075119, "learning_rate": 4.7274618585298195e-06, "loss": 0.0119, "step": 7613 }, { "epoch": 5.274679598198822, "grad_norm": 0.32790088653564453, "learning_rate": 4.7267683772538144e-06, "loss": 0.007, "step": 7614 }, { "epoch": 5.275372358850017, "grad_norm": 0.39944061636924744, "learning_rate": 4.726074895977809e-06, "loss": 0.0092, "step": 7615 }, { "epoch": 5.2760651195012125, "grad_norm": 0.31208524107933044, "learning_rate": 4.7253814147018035e-06, "loss": 0.0058, "step": 7616 }, { "epoch": 5.276757880152408, "grad_norm": 0.30092665553092957, "learning_rate": 4.7246879334257975e-06, "loss": 0.0082, "step": 7617 }, { "epoch": 5.277450640803602, "grad_norm": 0.3765978217124939, "learning_rate": 4.7239944521497925e-06, "loss": 0.0073, "step": 7618 }, { "epoch": 5.278143401454797, "grad_norm": 0.2820228338241577, "learning_rate": 4.723300970873787e-06, "loss": 0.0071, "step": 7619 }, { "epoch": 5.278836162105993, "grad_norm": 0.26217740774154663, "learning_rate": 4.7226074895977815e-06, "loss": 0.0054, "step": 7620 }, { "epoch": 5.279528922757187, "grad_norm": 0.5804328918457031, "learning_rate": 4.721914008321776e-06, "loss": 0.0094, "step": 7621 }, { "epoch": 5.280221683408382, "grad_norm": 0.32458844780921936, "learning_rate": 4.72122052704577e-06, "loss": 0.007, "step": 7622 }, { "epoch": 5.280914444059578, "grad_norm": 0.3897286057472229, "learning_rate": 4.720527045769765e-06, "loss": 0.0089, "step": 7623 }, { "epoch": 5.281607204710772, "grad_norm": 0.4801514744758606, "learning_rate": 4.7198335644937595e-06, "loss": 0.0129, "step": 7624 }, { "epoch": 5.282299965361967, "grad_norm": 0.31313589215278625, "learning_rate": 4.719140083217754e-06, "loss": 0.0083, "step": 7625 }, { "epoch": 5.282992726013163, "grad_norm": 0.4712836444377899, "learning_rate": 4.718446601941748e-06, "loss": 0.0099, "step": 7626 }, { "epoch": 5.283685486664357, "grad_norm": 0.30827465653419495, "learning_rate": 4.717753120665743e-06, "loss": 0.0065, "step": 7627 }, { "epoch": 5.284378247315552, "grad_norm": 0.2368597537279129, "learning_rate": 4.717059639389737e-06, "loss": 0.0052, "step": 7628 }, { "epoch": 5.285071007966748, "grad_norm": 0.3655679523944855, "learning_rate": 4.716366158113732e-06, "loss": 0.0064, "step": 7629 }, { "epoch": 5.285763768617943, "grad_norm": 0.3163033127784729, "learning_rate": 4.715672676837726e-06, "loss": 0.0066, "step": 7630 }, { "epoch": 5.286456529269137, "grad_norm": 0.32988113164901733, "learning_rate": 4.71497919556172e-06, "loss": 0.009, "step": 7631 }, { "epoch": 5.2871492899203325, "grad_norm": 0.4472014904022217, "learning_rate": 4.714285714285715e-06, "loss": 0.0087, "step": 7632 }, { "epoch": 5.287842050571528, "grad_norm": 0.2753150463104248, "learning_rate": 4.713592233009709e-06, "loss": 0.005, "step": 7633 }, { "epoch": 5.288534811222722, "grad_norm": 0.4464603364467621, "learning_rate": 4.712898751733704e-06, "loss": 0.0088, "step": 7634 }, { "epoch": 5.2892275718739175, "grad_norm": 0.18909068405628204, "learning_rate": 4.712205270457698e-06, "loss": 0.0045, "step": 7635 }, { "epoch": 5.289920332525113, "grad_norm": 0.4636782705783844, "learning_rate": 4.711511789181693e-06, "loss": 0.0164, "step": 7636 }, { "epoch": 5.290613093176308, "grad_norm": 0.3970611095428467, "learning_rate": 4.710818307905687e-06, "loss": 0.0069, "step": 7637 }, { "epoch": 5.291305853827502, "grad_norm": 0.3895193934440613, "learning_rate": 4.710124826629681e-06, "loss": 0.0075, "step": 7638 }, { "epoch": 5.291998614478698, "grad_norm": 0.3456301689147949, "learning_rate": 4.709431345353676e-06, "loss": 0.0091, "step": 7639 }, { "epoch": 5.292691375129893, "grad_norm": 0.47521817684173584, "learning_rate": 4.70873786407767e-06, "loss": 0.0098, "step": 7640 }, { "epoch": 5.293384135781087, "grad_norm": 0.3610929846763611, "learning_rate": 4.708044382801665e-06, "loss": 0.0085, "step": 7641 }, { "epoch": 5.294076896432283, "grad_norm": 0.739068329334259, "learning_rate": 4.707350901525659e-06, "loss": 0.0069, "step": 7642 }, { "epoch": 5.294769657083478, "grad_norm": 0.3092059791088104, "learning_rate": 4.706657420249653e-06, "loss": 0.0062, "step": 7643 }, { "epoch": 5.295462417734672, "grad_norm": 0.3299868106842041, "learning_rate": 4.705963938973648e-06, "loss": 0.0072, "step": 7644 }, { "epoch": 5.296155178385868, "grad_norm": 1.6031320095062256, "learning_rate": 4.705270457697643e-06, "loss": 0.0109, "step": 7645 }, { "epoch": 5.296847939037063, "grad_norm": 0.3581763803958893, "learning_rate": 4.704576976421637e-06, "loss": 0.0073, "step": 7646 }, { "epoch": 5.297540699688257, "grad_norm": 0.5154359936714172, "learning_rate": 4.703883495145631e-06, "loss": 0.0119, "step": 7647 }, { "epoch": 5.2982334603394525, "grad_norm": 0.27011722326278687, "learning_rate": 4.703190013869626e-06, "loss": 0.0083, "step": 7648 }, { "epoch": 5.298926220990648, "grad_norm": 0.4391592741012573, "learning_rate": 4.702496532593621e-06, "loss": 0.0112, "step": 7649 }, { "epoch": 5.299618981641843, "grad_norm": 0.3810083270072937, "learning_rate": 4.701803051317615e-06, "loss": 0.0063, "step": 7650 }, { "epoch": 5.3003117422930375, "grad_norm": 0.3371776044368744, "learning_rate": 4.701109570041609e-06, "loss": 0.0088, "step": 7651 }, { "epoch": 5.301004502944233, "grad_norm": 0.2894117534160614, "learning_rate": 4.700416088765603e-06, "loss": 0.0077, "step": 7652 }, { "epoch": 5.301697263595428, "grad_norm": 0.3736479878425598, "learning_rate": 4.699722607489598e-06, "loss": 0.0079, "step": 7653 }, { "epoch": 5.302390024246622, "grad_norm": 0.2794745862483978, "learning_rate": 4.699029126213593e-06, "loss": 0.0053, "step": 7654 }, { "epoch": 5.303082784897818, "grad_norm": 0.34775087237358093, "learning_rate": 4.698335644937587e-06, "loss": 0.0082, "step": 7655 }, { "epoch": 5.303775545549013, "grad_norm": 0.45007821917533875, "learning_rate": 4.697642163661581e-06, "loss": 0.0076, "step": 7656 }, { "epoch": 5.304468306200208, "grad_norm": 0.2986973524093628, "learning_rate": 4.696948682385576e-06, "loss": 0.0064, "step": 7657 }, { "epoch": 5.305161066851403, "grad_norm": 0.3737943768501282, "learning_rate": 4.69625520110957e-06, "loss": 0.0054, "step": 7658 }, { "epoch": 5.305853827502598, "grad_norm": 0.2908320128917694, "learning_rate": 4.695561719833565e-06, "loss": 0.006, "step": 7659 }, { "epoch": 5.306546588153793, "grad_norm": 0.5494328737258911, "learning_rate": 4.694868238557559e-06, "loss": 0.0076, "step": 7660 }, { "epoch": 5.307239348804988, "grad_norm": 0.22597812116146088, "learning_rate": 4.6941747572815534e-06, "loss": 0.0053, "step": 7661 }, { "epoch": 5.307932109456183, "grad_norm": 0.46040821075439453, "learning_rate": 4.693481276005548e-06, "loss": 0.0099, "step": 7662 }, { "epoch": 5.308624870107378, "grad_norm": 0.381632536649704, "learning_rate": 4.6927877947295425e-06, "loss": 0.0086, "step": 7663 }, { "epoch": 5.3093176307585725, "grad_norm": 0.283383309841156, "learning_rate": 4.692094313453537e-06, "loss": 0.006, "step": 7664 }, { "epoch": 5.310010391409768, "grad_norm": 0.3281424045562744, "learning_rate": 4.6914008321775315e-06, "loss": 0.0061, "step": 7665 }, { "epoch": 5.310703152060963, "grad_norm": 0.4245662987232208, "learning_rate": 4.690707350901526e-06, "loss": 0.0066, "step": 7666 }, { "epoch": 5.3113959127121575, "grad_norm": 0.3177310824394226, "learning_rate": 4.6900138696255205e-06, "loss": 0.0064, "step": 7667 }, { "epoch": 5.312088673363353, "grad_norm": 0.3352442681789398, "learning_rate": 4.689320388349515e-06, "loss": 0.0074, "step": 7668 }, { "epoch": 5.312781434014548, "grad_norm": 0.24328415095806122, "learning_rate": 4.6886269070735095e-06, "loss": 0.0051, "step": 7669 }, { "epoch": 5.313474194665743, "grad_norm": 0.32478755712509155, "learning_rate": 4.687933425797504e-06, "loss": 0.0083, "step": 7670 }, { "epoch": 5.314166955316938, "grad_norm": 0.2994299530982971, "learning_rate": 4.6872399445214985e-06, "loss": 0.0073, "step": 7671 }, { "epoch": 5.314859715968133, "grad_norm": 0.4358683228492737, "learning_rate": 4.686546463245493e-06, "loss": 0.0063, "step": 7672 }, { "epoch": 5.315552476619328, "grad_norm": 0.27254951000213623, "learning_rate": 4.685852981969487e-06, "loss": 0.0062, "step": 7673 }, { "epoch": 5.316245237270523, "grad_norm": 0.26246801018714905, "learning_rate": 4.685159500693482e-06, "loss": 0.005, "step": 7674 }, { "epoch": 5.316937997921718, "grad_norm": 0.3456709384918213, "learning_rate": 4.6844660194174766e-06, "loss": 0.0069, "step": 7675 }, { "epoch": 5.317630758572913, "grad_norm": 0.4331519901752472, "learning_rate": 4.683772538141471e-06, "loss": 0.0089, "step": 7676 }, { "epoch": 5.3183235192241085, "grad_norm": 0.27074652910232544, "learning_rate": 4.683079056865465e-06, "loss": 0.006, "step": 7677 }, { "epoch": 5.319016279875303, "grad_norm": 0.3390403985977173, "learning_rate": 4.68238557558946e-06, "loss": 0.0082, "step": 7678 }, { "epoch": 5.319709040526498, "grad_norm": 0.3624959886074066, "learning_rate": 4.681692094313454e-06, "loss": 0.0057, "step": 7679 }, { "epoch": 5.3204018011776935, "grad_norm": 0.28021004796028137, "learning_rate": 4.680998613037449e-06, "loss": 0.0051, "step": 7680 }, { "epoch": 5.321094561828888, "grad_norm": 0.5038958191871643, "learning_rate": 4.680305131761443e-06, "loss": 0.0065, "step": 7681 }, { "epoch": 5.321787322480083, "grad_norm": 0.31561705470085144, "learning_rate": 4.679611650485437e-06, "loss": 0.008, "step": 7682 }, { "epoch": 5.322480083131278, "grad_norm": 0.39957281947135925, "learning_rate": 4.678918169209432e-06, "loss": 0.0071, "step": 7683 }, { "epoch": 5.323172843782473, "grad_norm": 0.3455754816532135, "learning_rate": 4.678224687933427e-06, "loss": 0.0103, "step": 7684 }, { "epoch": 5.323865604433668, "grad_norm": 0.5441429615020752, "learning_rate": 4.677531206657421e-06, "loss": 0.0082, "step": 7685 }, { "epoch": 5.324558365084863, "grad_norm": 0.26968908309936523, "learning_rate": 4.676837725381415e-06, "loss": 0.0068, "step": 7686 }, { "epoch": 5.325251125736058, "grad_norm": 0.32514292001724243, "learning_rate": 4.67614424410541e-06, "loss": 0.01, "step": 7687 }, { "epoch": 5.325943886387253, "grad_norm": 0.3907332718372345, "learning_rate": 4.675450762829404e-06, "loss": 0.0095, "step": 7688 }, { "epoch": 5.326636647038448, "grad_norm": 0.4088253974914551, "learning_rate": 4.674757281553399e-06, "loss": 0.0094, "step": 7689 }, { "epoch": 5.327329407689644, "grad_norm": 0.3880894184112549, "learning_rate": 4.674063800277393e-06, "loss": 0.006, "step": 7690 }, { "epoch": 5.328022168340838, "grad_norm": 0.266454815864563, "learning_rate": 4.673370319001387e-06, "loss": 0.0075, "step": 7691 }, { "epoch": 5.328714928992033, "grad_norm": 0.4018932580947876, "learning_rate": 4.672676837725382e-06, "loss": 0.0087, "step": 7692 }, { "epoch": 5.3294076896432285, "grad_norm": 0.42245224118232727, "learning_rate": 4.671983356449376e-06, "loss": 0.0077, "step": 7693 }, { "epoch": 5.330100450294423, "grad_norm": 0.2805998623371124, "learning_rate": 4.671289875173371e-06, "loss": 0.0063, "step": 7694 }, { "epoch": 5.330793210945618, "grad_norm": 0.37564656138420105, "learning_rate": 4.670596393897365e-06, "loss": 0.0095, "step": 7695 }, { "epoch": 5.3314859715968135, "grad_norm": 0.29174837470054626, "learning_rate": 4.66990291262136e-06, "loss": 0.0099, "step": 7696 }, { "epoch": 5.332178732248009, "grad_norm": 0.47423186898231506, "learning_rate": 4.669209431345354e-06, "loss": 0.0079, "step": 7697 }, { "epoch": 5.332871492899203, "grad_norm": 0.3046198785305023, "learning_rate": 4.668515950069348e-06, "loss": 0.0065, "step": 7698 }, { "epoch": 5.333564253550398, "grad_norm": 0.3165915608406067, "learning_rate": 4.667822468793343e-06, "loss": 0.0066, "step": 7699 }, { "epoch": 5.334257014201594, "grad_norm": 0.29851290583610535, "learning_rate": 4.667128987517337e-06, "loss": 0.0062, "step": 7700 }, { "epoch": 5.334949774852788, "grad_norm": 0.22100408375263214, "learning_rate": 4.666435506241332e-06, "loss": 0.006, "step": 7701 }, { "epoch": 5.335642535503983, "grad_norm": 0.30884596705436707, "learning_rate": 4.665742024965326e-06, "loss": 0.0052, "step": 7702 }, { "epoch": 5.336335296155179, "grad_norm": 0.2811336815357208, "learning_rate": 4.66504854368932e-06, "loss": 0.0071, "step": 7703 }, { "epoch": 5.337028056806373, "grad_norm": 0.29546523094177246, "learning_rate": 4.664355062413315e-06, "loss": 0.0066, "step": 7704 }, { "epoch": 5.337720817457568, "grad_norm": 0.4193173348903656, "learning_rate": 4.66366158113731e-06, "loss": 0.0061, "step": 7705 }, { "epoch": 5.338413578108764, "grad_norm": 0.2141648828983307, "learning_rate": 4.662968099861304e-06, "loss": 0.0045, "step": 7706 }, { "epoch": 5.339106338759958, "grad_norm": 0.30084890127182007, "learning_rate": 4.662274618585298e-06, "loss": 0.0071, "step": 7707 }, { "epoch": 5.339799099411153, "grad_norm": 0.46289554238319397, "learning_rate": 4.6615811373092924e-06, "loss": 0.0058, "step": 7708 }, { "epoch": 5.3404918600623486, "grad_norm": 0.34725630283355713, "learning_rate": 4.660887656033287e-06, "loss": 0.0063, "step": 7709 }, { "epoch": 5.341184620713544, "grad_norm": 0.37042534351348877, "learning_rate": 4.660194174757282e-06, "loss": 0.0097, "step": 7710 }, { "epoch": 5.341877381364738, "grad_norm": 0.30515289306640625, "learning_rate": 4.659500693481276e-06, "loss": 0.0066, "step": 7711 }, { "epoch": 5.3425701420159335, "grad_norm": 0.5584580302238464, "learning_rate": 4.6588072122052705e-06, "loss": 0.0123, "step": 7712 }, { "epoch": 5.343262902667129, "grad_norm": 0.42978665232658386, "learning_rate": 4.658113730929265e-06, "loss": 0.0088, "step": 7713 }, { "epoch": 5.343955663318323, "grad_norm": 0.5490087270736694, "learning_rate": 4.65742024965326e-06, "loss": 0.0061, "step": 7714 }, { "epoch": 5.3446484239695184, "grad_norm": 0.7571777105331421, "learning_rate": 4.6567267683772544e-06, "loss": 0.0154, "step": 7715 }, { "epoch": 5.345341184620714, "grad_norm": 0.8619104623794556, "learning_rate": 4.6560332871012485e-06, "loss": 0.0091, "step": 7716 }, { "epoch": 5.346033945271909, "grad_norm": 0.3772400915622711, "learning_rate": 4.6553398058252435e-06, "loss": 0.0082, "step": 7717 }, { "epoch": 5.346726705923103, "grad_norm": 0.2627728581428528, "learning_rate": 4.6546463245492375e-06, "loss": 0.0063, "step": 7718 }, { "epoch": 5.347419466574299, "grad_norm": 0.262724906206131, "learning_rate": 4.6539528432732325e-06, "loss": 0.0058, "step": 7719 }, { "epoch": 5.348112227225494, "grad_norm": 0.23938456177711487, "learning_rate": 4.6532593619972266e-06, "loss": 0.0055, "step": 7720 }, { "epoch": 5.348804987876688, "grad_norm": 0.3758114278316498, "learning_rate": 4.652565880721221e-06, "loss": 0.0081, "step": 7721 }, { "epoch": 5.349497748527884, "grad_norm": 0.45761778950691223, "learning_rate": 4.6518723994452156e-06, "loss": 0.0113, "step": 7722 }, { "epoch": 5.350190509179079, "grad_norm": 0.30680564045906067, "learning_rate": 4.65117891816921e-06, "loss": 0.0083, "step": 7723 }, { "epoch": 5.350883269830273, "grad_norm": 0.42523193359375, "learning_rate": 4.650485436893205e-06, "loss": 0.0081, "step": 7724 }, { "epoch": 5.351576030481469, "grad_norm": 0.4331026077270508, "learning_rate": 4.649791955617199e-06, "loss": 0.0112, "step": 7725 }, { "epoch": 5.352268791132664, "grad_norm": 0.3689337372779846, "learning_rate": 4.649098474341194e-06, "loss": 0.0057, "step": 7726 }, { "epoch": 5.352961551783858, "grad_norm": 0.5229584574699402, "learning_rate": 4.648404993065188e-06, "loss": 0.0087, "step": 7727 }, { "epoch": 5.3536543124350535, "grad_norm": 0.7035202383995056, "learning_rate": 4.647711511789182e-06, "loss": 0.0119, "step": 7728 }, { "epoch": 5.354347073086249, "grad_norm": 0.3946300148963928, "learning_rate": 4.647018030513177e-06, "loss": 0.0063, "step": 7729 }, { "epoch": 5.355039833737444, "grad_norm": 0.49915385246276855, "learning_rate": 4.646324549237171e-06, "loss": 0.0104, "step": 7730 }, { "epoch": 5.3557325943886385, "grad_norm": 0.3928295373916626, "learning_rate": 4.645631067961166e-06, "loss": 0.0085, "step": 7731 }, { "epoch": 5.356425355039834, "grad_norm": 0.30644491314888, "learning_rate": 4.64493758668516e-06, "loss": 0.0068, "step": 7732 }, { "epoch": 5.357118115691029, "grad_norm": 0.31353700160980225, "learning_rate": 4.644244105409154e-06, "loss": 0.0075, "step": 7733 }, { "epoch": 5.357810876342223, "grad_norm": 0.43064355850219727, "learning_rate": 4.643550624133149e-06, "loss": 0.0088, "step": 7734 }, { "epoch": 5.358503636993419, "grad_norm": 0.6127991080284119, "learning_rate": 4.642857142857144e-06, "loss": 0.0092, "step": 7735 }, { "epoch": 5.359196397644614, "grad_norm": 0.39949271082878113, "learning_rate": 4.642163661581138e-06, "loss": 0.0073, "step": 7736 }, { "epoch": 5.359889158295809, "grad_norm": 0.35690221190452576, "learning_rate": 4.641470180305132e-06, "loss": 0.0107, "step": 7737 }, { "epoch": 5.360581918947004, "grad_norm": 0.34157228469848633, "learning_rate": 4.640776699029126e-06, "loss": 0.009, "step": 7738 }, { "epoch": 5.361274679598199, "grad_norm": 0.3978438675403595, "learning_rate": 4.640083217753121e-06, "loss": 0.0081, "step": 7739 }, { "epoch": 5.361967440249394, "grad_norm": 0.468851238489151, "learning_rate": 4.639389736477116e-06, "loss": 0.0127, "step": 7740 }, { "epoch": 5.362660200900589, "grad_norm": 0.297496497631073, "learning_rate": 4.63869625520111e-06, "loss": 0.0086, "step": 7741 }, { "epoch": 5.363352961551784, "grad_norm": 0.32384419441223145, "learning_rate": 4.638002773925104e-06, "loss": 0.0057, "step": 7742 }, { "epoch": 5.364045722202979, "grad_norm": 0.2854473888874054, "learning_rate": 4.637309292649099e-06, "loss": 0.0052, "step": 7743 }, { "epoch": 5.3647384828541735, "grad_norm": 0.5612171292304993, "learning_rate": 4.636615811373094e-06, "loss": 0.0086, "step": 7744 }, { "epoch": 5.365431243505369, "grad_norm": 0.329746276140213, "learning_rate": 4.635922330097088e-06, "loss": 0.0089, "step": 7745 }, { "epoch": 5.366124004156564, "grad_norm": 0.43329378962516785, "learning_rate": 4.635228848821082e-06, "loss": 0.0099, "step": 7746 }, { "epoch": 5.3668167648077585, "grad_norm": 0.3019446134567261, "learning_rate": 4.634535367545076e-06, "loss": 0.0081, "step": 7747 }, { "epoch": 5.367509525458954, "grad_norm": 0.30500757694244385, "learning_rate": 4.633841886269071e-06, "loss": 0.0068, "step": 7748 }, { "epoch": 5.368202286110149, "grad_norm": 0.3315373361110687, "learning_rate": 4.633148404993066e-06, "loss": 0.0058, "step": 7749 }, { "epoch": 5.368895046761344, "grad_norm": 0.42689260840415955, "learning_rate": 4.63245492371706e-06, "loss": 0.0082, "step": 7750 }, { "epoch": 5.369587807412539, "grad_norm": 0.4819112718105316, "learning_rate": 4.631761442441054e-06, "loss": 0.0069, "step": 7751 }, { "epoch": 5.370280568063734, "grad_norm": 0.4219898581504822, "learning_rate": 4.631067961165049e-06, "loss": 0.0072, "step": 7752 }, { "epoch": 5.370973328714929, "grad_norm": 0.48407548666000366, "learning_rate": 4.630374479889043e-06, "loss": 0.0078, "step": 7753 }, { "epoch": 5.371666089366124, "grad_norm": 0.33480745553970337, "learning_rate": 4.629680998613038e-06, "loss": 0.0059, "step": 7754 }, { "epoch": 5.372358850017319, "grad_norm": 0.3247593939304352, "learning_rate": 4.628987517337032e-06, "loss": 0.0068, "step": 7755 }, { "epoch": 5.373051610668514, "grad_norm": 0.38313329219818115, "learning_rate": 4.628294036061026e-06, "loss": 0.0116, "step": 7756 }, { "epoch": 5.3737443713197095, "grad_norm": 0.4368196129798889, "learning_rate": 4.627600554785021e-06, "loss": 0.0074, "step": 7757 }, { "epoch": 5.374437131970904, "grad_norm": 0.30036941170692444, "learning_rate": 4.626907073509015e-06, "loss": 0.0067, "step": 7758 }, { "epoch": 5.375129892622099, "grad_norm": 0.2873584032058716, "learning_rate": 4.62621359223301e-06, "loss": 0.0063, "step": 7759 }, { "epoch": 5.3758226532732944, "grad_norm": 0.40364065766334534, "learning_rate": 4.625520110957004e-06, "loss": 0.0082, "step": 7760 }, { "epoch": 5.376515413924489, "grad_norm": 0.4257599413394928, "learning_rate": 4.624826629680999e-06, "loss": 0.0084, "step": 7761 }, { "epoch": 5.377208174575684, "grad_norm": 0.3269195258617401, "learning_rate": 4.6241331484049934e-06, "loss": 0.0068, "step": 7762 }, { "epoch": 5.377900935226879, "grad_norm": 0.40550678968429565, "learning_rate": 4.6234396671289875e-06, "loss": 0.0077, "step": 7763 }, { "epoch": 5.378593695878074, "grad_norm": 0.4186185896396637, "learning_rate": 4.6227461858529825e-06, "loss": 0.0104, "step": 7764 }, { "epoch": 5.379286456529269, "grad_norm": 0.45377662777900696, "learning_rate": 4.622052704576977e-06, "loss": 0.0093, "step": 7765 }, { "epoch": 5.379979217180464, "grad_norm": 0.4113067090511322, "learning_rate": 4.6213592233009715e-06, "loss": 0.0079, "step": 7766 }, { "epoch": 5.380671977831659, "grad_norm": 0.29760265350341797, "learning_rate": 4.6206657420249656e-06, "loss": 0.0056, "step": 7767 }, { "epoch": 5.381364738482854, "grad_norm": 0.2859029769897461, "learning_rate": 4.61997226074896e-06, "loss": 0.0092, "step": 7768 }, { "epoch": 5.382057499134049, "grad_norm": 0.40182769298553467, "learning_rate": 4.619278779472955e-06, "loss": 0.008, "step": 7769 }, { "epoch": 5.382750259785245, "grad_norm": 0.3549944758415222, "learning_rate": 4.6185852981969495e-06, "loss": 0.007, "step": 7770 }, { "epoch": 5.383443020436439, "grad_norm": 0.30594438314437866, "learning_rate": 4.617891816920944e-06, "loss": 0.0064, "step": 7771 }, { "epoch": 5.384135781087634, "grad_norm": 0.3039894104003906, "learning_rate": 4.617198335644938e-06, "loss": 0.0044, "step": 7772 }, { "epoch": 5.3848285417388295, "grad_norm": 0.256325900554657, "learning_rate": 4.616504854368933e-06, "loss": 0.0064, "step": 7773 }, { "epoch": 5.385521302390024, "grad_norm": 0.3893841803073883, "learning_rate": 4.615811373092927e-06, "loss": 0.0061, "step": 7774 }, { "epoch": 5.386214063041219, "grad_norm": 0.5432993769645691, "learning_rate": 4.615117891816922e-06, "loss": 0.011, "step": 7775 }, { "epoch": 5.3869068236924145, "grad_norm": 0.30605578422546387, "learning_rate": 4.614424410540916e-06, "loss": 0.0062, "step": 7776 }, { "epoch": 5.38759958434361, "grad_norm": 0.3061191439628601, "learning_rate": 4.61373092926491e-06, "loss": 0.0072, "step": 7777 }, { "epoch": 5.388292344994804, "grad_norm": 0.597377598285675, "learning_rate": 4.613037447988905e-06, "loss": 0.0133, "step": 7778 }, { "epoch": 5.388985105645999, "grad_norm": 0.5816665887832642, "learning_rate": 4.612343966712899e-06, "loss": 0.0083, "step": 7779 }, { "epoch": 5.389677866297195, "grad_norm": 0.4072982370853424, "learning_rate": 4.611650485436894e-06, "loss": 0.0104, "step": 7780 }, { "epoch": 5.390370626948389, "grad_norm": 0.3289441168308258, "learning_rate": 4.610957004160888e-06, "loss": 0.0064, "step": 7781 }, { "epoch": 5.391063387599584, "grad_norm": 0.7042639851570129, "learning_rate": 4.610263522884883e-06, "loss": 0.0097, "step": 7782 }, { "epoch": 5.39175614825078, "grad_norm": 0.3567148447036743, "learning_rate": 4.609570041608877e-06, "loss": 0.0125, "step": 7783 }, { "epoch": 5.392448908901974, "grad_norm": 0.27430447936058044, "learning_rate": 4.608876560332871e-06, "loss": 0.0056, "step": 7784 }, { "epoch": 5.393141669553169, "grad_norm": 0.9605273008346558, "learning_rate": 4.608183079056866e-06, "loss": 0.0092, "step": 7785 }, { "epoch": 5.393834430204365, "grad_norm": 0.36006471514701843, "learning_rate": 4.60748959778086e-06, "loss": 0.0094, "step": 7786 }, { "epoch": 5.394527190855559, "grad_norm": 0.3300853967666626, "learning_rate": 4.606796116504855e-06, "loss": 0.0092, "step": 7787 }, { "epoch": 5.395219951506754, "grad_norm": 0.36270296573638916, "learning_rate": 4.606102635228849e-06, "loss": 0.0068, "step": 7788 }, { "epoch": 5.3959127121579495, "grad_norm": 0.3791219890117645, "learning_rate": 4.605409153952843e-06, "loss": 0.0064, "step": 7789 }, { "epoch": 5.396605472809145, "grad_norm": 0.26875039935112, "learning_rate": 4.604715672676838e-06, "loss": 0.0065, "step": 7790 }, { "epoch": 5.397298233460339, "grad_norm": 0.33566567301750183, "learning_rate": 4.604022191400833e-06, "loss": 0.0054, "step": 7791 }, { "epoch": 5.3979909941115345, "grad_norm": 0.39481306076049805, "learning_rate": 4.603328710124827e-06, "loss": 0.0085, "step": 7792 }, { "epoch": 5.39868375476273, "grad_norm": 0.29390987753868103, "learning_rate": 4.602635228848821e-06, "loss": 0.0089, "step": 7793 }, { "epoch": 5.399376515413924, "grad_norm": 0.3850598931312561, "learning_rate": 4.601941747572816e-06, "loss": 0.0088, "step": 7794 }, { "epoch": 5.400069276065119, "grad_norm": 0.3381340503692627, "learning_rate": 4.60124826629681e-06, "loss": 0.0106, "step": 7795 }, { "epoch": 5.400762036716315, "grad_norm": 0.3669929802417755, "learning_rate": 4.600554785020805e-06, "loss": 0.0091, "step": 7796 }, { "epoch": 5.40145479736751, "grad_norm": 0.35967838764190674, "learning_rate": 4.599861303744799e-06, "loss": 0.0067, "step": 7797 }, { "epoch": 5.402147558018704, "grad_norm": 0.3908219039440155, "learning_rate": 4.599167822468793e-06, "loss": 0.0059, "step": 7798 }, { "epoch": 5.4028403186699, "grad_norm": 0.3632335364818573, "learning_rate": 4.598474341192788e-06, "loss": 0.007, "step": 7799 }, { "epoch": 5.403533079321095, "grad_norm": 0.3082124590873718, "learning_rate": 4.597780859916783e-06, "loss": 0.0085, "step": 7800 }, { "epoch": 5.404225839972289, "grad_norm": 0.338107168674469, "learning_rate": 4.597087378640777e-06, "loss": 0.0089, "step": 7801 }, { "epoch": 5.404918600623485, "grad_norm": 0.3868095874786377, "learning_rate": 4.596393897364771e-06, "loss": 0.0098, "step": 7802 }, { "epoch": 5.40561136127468, "grad_norm": 0.341936856508255, "learning_rate": 4.595700416088766e-06, "loss": 0.0078, "step": 7803 }, { "epoch": 5.406304121925874, "grad_norm": 0.3599020540714264, "learning_rate": 4.59500693481276e-06, "loss": 0.006, "step": 7804 }, { "epoch": 5.40699688257707, "grad_norm": 0.34239375591278076, "learning_rate": 4.594313453536755e-06, "loss": 0.0079, "step": 7805 }, { "epoch": 5.407689643228265, "grad_norm": 0.3811340034008026, "learning_rate": 4.593619972260749e-06, "loss": 0.0101, "step": 7806 }, { "epoch": 5.408382403879459, "grad_norm": 0.3789340555667877, "learning_rate": 4.592926490984743e-06, "loss": 0.0095, "step": 7807 }, { "epoch": 5.4090751645306545, "grad_norm": 0.6665703654289246, "learning_rate": 4.592233009708738e-06, "loss": 0.0082, "step": 7808 }, { "epoch": 5.40976792518185, "grad_norm": 0.41004064679145813, "learning_rate": 4.5915395284327324e-06, "loss": 0.0092, "step": 7809 }, { "epoch": 5.410460685833045, "grad_norm": 0.5761812329292297, "learning_rate": 4.590846047156727e-06, "loss": 0.0107, "step": 7810 }, { "epoch": 5.4111534464842395, "grad_norm": 0.4545287489891052, "learning_rate": 4.5901525658807215e-06, "loss": 0.0092, "step": 7811 }, { "epoch": 5.411846207135435, "grad_norm": 0.3607684075832367, "learning_rate": 4.589459084604716e-06, "loss": 0.008, "step": 7812 }, { "epoch": 5.41253896778663, "grad_norm": 0.24616214632987976, "learning_rate": 4.5887656033287105e-06, "loss": 0.0046, "step": 7813 }, { "epoch": 5.413231728437824, "grad_norm": 0.2663242816925049, "learning_rate": 4.5880721220527046e-06, "loss": 0.0062, "step": 7814 }, { "epoch": 5.41392448908902, "grad_norm": 0.3745112717151642, "learning_rate": 4.5873786407766995e-06, "loss": 0.0085, "step": 7815 }, { "epoch": 5.414617249740215, "grad_norm": 0.35963526368141174, "learning_rate": 4.586685159500694e-06, "loss": 0.0072, "step": 7816 }, { "epoch": 5.415310010391409, "grad_norm": 0.5829387307167053, "learning_rate": 4.5859916782246885e-06, "loss": 0.0087, "step": 7817 }, { "epoch": 5.416002771042605, "grad_norm": 0.5039746165275574, "learning_rate": 4.585298196948683e-06, "loss": 0.0092, "step": 7818 }, { "epoch": 5.4166955316938, "grad_norm": 0.42750078439712524, "learning_rate": 4.584604715672677e-06, "loss": 0.012, "step": 7819 }, { "epoch": 5.417388292344995, "grad_norm": 0.4325467050075531, "learning_rate": 4.583911234396672e-06, "loss": 0.0081, "step": 7820 }, { "epoch": 5.41808105299619, "grad_norm": 0.3611724078655243, "learning_rate": 4.5832177531206666e-06, "loss": 0.009, "step": 7821 }, { "epoch": 5.418773813647385, "grad_norm": 0.4679085910320282, "learning_rate": 4.582524271844661e-06, "loss": 0.0086, "step": 7822 }, { "epoch": 5.41946657429858, "grad_norm": 0.4069465100765228, "learning_rate": 4.581830790568655e-06, "loss": 0.0065, "step": 7823 }, { "epoch": 5.4201593349497745, "grad_norm": 0.34985414147377014, "learning_rate": 4.581137309292649e-06, "loss": 0.0082, "step": 7824 }, { "epoch": 5.42085209560097, "grad_norm": 0.3227502703666687, "learning_rate": 4.580443828016644e-06, "loss": 0.0062, "step": 7825 }, { "epoch": 5.421544856252165, "grad_norm": 0.3587585687637329, "learning_rate": 4.579750346740639e-06, "loss": 0.0093, "step": 7826 }, { "epoch": 5.4222376169033595, "grad_norm": 0.4926263689994812, "learning_rate": 4.579056865464633e-06, "loss": 0.0081, "step": 7827 }, { "epoch": 5.422930377554555, "grad_norm": 0.47826123237609863, "learning_rate": 4.578363384188627e-06, "loss": 0.0099, "step": 7828 }, { "epoch": 5.42362313820575, "grad_norm": 0.19716590642929077, "learning_rate": 4.577669902912622e-06, "loss": 0.0051, "step": 7829 }, { "epoch": 5.424315898856945, "grad_norm": 0.22191399335861206, "learning_rate": 4.576976421636617e-06, "loss": 0.0049, "step": 7830 }, { "epoch": 5.42500865950814, "grad_norm": 0.3407326340675354, "learning_rate": 4.576282940360611e-06, "loss": 0.0093, "step": 7831 }, { "epoch": 5.425701420159335, "grad_norm": 0.39827200770378113, "learning_rate": 4.575589459084605e-06, "loss": 0.0132, "step": 7832 }, { "epoch": 5.42639418081053, "grad_norm": 0.3168533146381378, "learning_rate": 4.574895977808599e-06, "loss": 0.0098, "step": 7833 }, { "epoch": 5.427086941461725, "grad_norm": 0.2461700439453125, "learning_rate": 4.574202496532594e-06, "loss": 0.0073, "step": 7834 }, { "epoch": 5.42777970211292, "grad_norm": 0.2408578246831894, "learning_rate": 4.573509015256589e-06, "loss": 0.0052, "step": 7835 }, { "epoch": 5.428472462764115, "grad_norm": 0.38545313477516174, "learning_rate": 4.572815533980583e-06, "loss": 0.0099, "step": 7836 }, { "epoch": 5.42916522341531, "grad_norm": 0.28080132603645325, "learning_rate": 4.572122052704577e-06, "loss": 0.0058, "step": 7837 }, { "epoch": 5.429857984066505, "grad_norm": 0.367831826210022, "learning_rate": 4.571428571428572e-06, "loss": 0.0083, "step": 7838 }, { "epoch": 5.4305507447177, "grad_norm": 0.35450807213783264, "learning_rate": 4.570735090152566e-06, "loss": 0.009, "step": 7839 }, { "epoch": 5.431243505368895, "grad_norm": 0.5089789032936096, "learning_rate": 4.570041608876561e-06, "loss": 0.0099, "step": 7840 }, { "epoch": 5.43193626602009, "grad_norm": 0.42711472511291504, "learning_rate": 4.569348127600555e-06, "loss": 0.0078, "step": 7841 }, { "epoch": 5.432629026671285, "grad_norm": 0.46557801961898804, "learning_rate": 4.56865464632455e-06, "loss": 0.01, "step": 7842 }, { "epoch": 5.43332178732248, "grad_norm": 0.29302334785461426, "learning_rate": 4.567961165048544e-06, "loss": 0.0067, "step": 7843 }, { "epoch": 5.434014547973675, "grad_norm": 0.4012078046798706, "learning_rate": 4.567267683772538e-06, "loss": 0.0063, "step": 7844 }, { "epoch": 5.43470730862487, "grad_norm": 0.5524371266365051, "learning_rate": 4.566574202496533e-06, "loss": 0.0114, "step": 7845 }, { "epoch": 5.435400069276065, "grad_norm": 0.40067043900489807, "learning_rate": 4.565880721220527e-06, "loss": 0.0107, "step": 7846 }, { "epoch": 5.43609282992726, "grad_norm": 0.3479993939399719, "learning_rate": 4.565187239944522e-06, "loss": 0.0094, "step": 7847 }, { "epoch": 5.436785590578455, "grad_norm": 0.42340004444122314, "learning_rate": 4.564493758668516e-06, "loss": 0.0055, "step": 7848 }, { "epoch": 5.43747835122965, "grad_norm": 0.32815006375312805, "learning_rate": 4.56380027739251e-06, "loss": 0.0075, "step": 7849 }, { "epoch": 5.438171111880846, "grad_norm": 0.3321520984172821, "learning_rate": 4.563106796116505e-06, "loss": 0.0084, "step": 7850 }, { "epoch": 5.43886387253204, "grad_norm": 0.3828342854976654, "learning_rate": 4.5624133148405e-06, "loss": 0.0126, "step": 7851 }, { "epoch": 5.439556633183235, "grad_norm": 0.3134111166000366, "learning_rate": 4.561719833564494e-06, "loss": 0.0072, "step": 7852 }, { "epoch": 5.4402493938344305, "grad_norm": 0.3222011923789978, "learning_rate": 4.561026352288488e-06, "loss": 0.0086, "step": 7853 }, { "epoch": 5.440942154485625, "grad_norm": 0.3499179780483246, "learning_rate": 4.560332871012482e-06, "loss": 0.0067, "step": 7854 }, { "epoch": 5.44163491513682, "grad_norm": 0.3237707316875458, "learning_rate": 4.559639389736477e-06, "loss": 0.006, "step": 7855 }, { "epoch": 5.4423276757880155, "grad_norm": 0.3217258155345917, "learning_rate": 4.558945908460472e-06, "loss": 0.0061, "step": 7856 }, { "epoch": 5.44302043643921, "grad_norm": 0.3172626793384552, "learning_rate": 4.558252427184466e-06, "loss": 0.0062, "step": 7857 }, { "epoch": 5.443713197090405, "grad_norm": 0.23633909225463867, "learning_rate": 4.5575589459084605e-06, "loss": 0.0062, "step": 7858 }, { "epoch": 5.4444059577416, "grad_norm": 0.3542218506336212, "learning_rate": 4.556865464632455e-06, "loss": 0.0068, "step": 7859 }, { "epoch": 5.445098718392796, "grad_norm": 0.36565008759498596, "learning_rate": 4.55617198335645e-06, "loss": 0.0063, "step": 7860 }, { "epoch": 5.44579147904399, "grad_norm": 0.4428595304489136, "learning_rate": 4.555478502080444e-06, "loss": 0.0061, "step": 7861 }, { "epoch": 5.446484239695185, "grad_norm": 0.4287349283695221, "learning_rate": 4.5547850208044385e-06, "loss": 0.0068, "step": 7862 }, { "epoch": 5.447177000346381, "grad_norm": 0.3254203200340271, "learning_rate": 4.554091539528433e-06, "loss": 0.0054, "step": 7863 }, { "epoch": 5.447869760997575, "grad_norm": 0.4210508465766907, "learning_rate": 4.5533980582524275e-06, "loss": 0.0091, "step": 7864 }, { "epoch": 5.44856252164877, "grad_norm": 0.36349746584892273, "learning_rate": 4.5527045769764224e-06, "loss": 0.0068, "step": 7865 }, { "epoch": 5.449255282299966, "grad_norm": 0.5324582457542419, "learning_rate": 4.5520110957004165e-06, "loss": 0.01, "step": 7866 }, { "epoch": 5.44994804295116, "grad_norm": 0.5588488578796387, "learning_rate": 4.551317614424411e-06, "loss": 0.0111, "step": 7867 }, { "epoch": 5.450640803602355, "grad_norm": 0.2990497052669525, "learning_rate": 4.5506241331484056e-06, "loss": 0.0084, "step": 7868 }, { "epoch": 5.4513335642535505, "grad_norm": 0.31489789485931396, "learning_rate": 4.5499306518724e-06, "loss": 0.0086, "step": 7869 }, { "epoch": 5.452026324904746, "grad_norm": 0.2952931523323059, "learning_rate": 4.5492371705963946e-06, "loss": 0.0068, "step": 7870 }, { "epoch": 5.45271908555594, "grad_norm": 0.32959821820259094, "learning_rate": 4.548543689320389e-06, "loss": 0.0086, "step": 7871 }, { "epoch": 5.4534118462071355, "grad_norm": 0.44898277521133423, "learning_rate": 4.547850208044383e-06, "loss": 0.0074, "step": 7872 }, { "epoch": 5.454104606858331, "grad_norm": 0.5427644848823547, "learning_rate": 4.547156726768378e-06, "loss": 0.0107, "step": 7873 }, { "epoch": 5.454797367509525, "grad_norm": 0.33111703395843506, "learning_rate": 4.546463245492372e-06, "loss": 0.0064, "step": 7874 }, { "epoch": 5.45549012816072, "grad_norm": 0.20245476067066193, "learning_rate": 4.545769764216367e-06, "loss": 0.0042, "step": 7875 }, { "epoch": 5.456182888811916, "grad_norm": 0.5624719262123108, "learning_rate": 4.545076282940361e-06, "loss": 0.0114, "step": 7876 }, { "epoch": 5.45687564946311, "grad_norm": 0.49390777945518494, "learning_rate": 4.544382801664356e-06, "loss": 0.0105, "step": 7877 }, { "epoch": 5.457568410114305, "grad_norm": 0.34519869089126587, "learning_rate": 4.54368932038835e-06, "loss": 0.0081, "step": 7878 }, { "epoch": 5.458261170765501, "grad_norm": 0.27565476298332214, "learning_rate": 4.542995839112344e-06, "loss": 0.006, "step": 7879 }, { "epoch": 5.458953931416696, "grad_norm": 0.3185814917087555, "learning_rate": 4.542302357836339e-06, "loss": 0.0076, "step": 7880 }, { "epoch": 5.45964669206789, "grad_norm": 0.40839284658432007, "learning_rate": 4.541608876560334e-06, "loss": 0.0084, "step": 7881 }, { "epoch": 5.460339452719086, "grad_norm": 0.39189034700393677, "learning_rate": 4.540915395284328e-06, "loss": 0.0078, "step": 7882 }, { "epoch": 5.461032213370281, "grad_norm": 0.2857482433319092, "learning_rate": 4.540221914008322e-06, "loss": 0.007, "step": 7883 }, { "epoch": 5.461724974021475, "grad_norm": 0.35811492800712585, "learning_rate": 4.539528432732316e-06, "loss": 0.0086, "step": 7884 }, { "epoch": 5.4624177346726706, "grad_norm": 0.3011959195137024, "learning_rate": 4.538834951456311e-06, "loss": 0.0069, "step": 7885 }, { "epoch": 5.463110495323866, "grad_norm": 0.24721817672252655, "learning_rate": 4.538141470180306e-06, "loss": 0.0057, "step": 7886 }, { "epoch": 5.46380325597506, "grad_norm": 0.3397451937198639, "learning_rate": 4.5374479889043e-06, "loss": 0.0075, "step": 7887 }, { "epoch": 5.4644960166262555, "grad_norm": 0.3887694478034973, "learning_rate": 4.536754507628294e-06, "loss": 0.0083, "step": 7888 }, { "epoch": 5.465188777277451, "grad_norm": 0.396060973405838, "learning_rate": 4.536061026352289e-06, "loss": 0.0092, "step": 7889 }, { "epoch": 5.465881537928646, "grad_norm": 0.38492250442504883, "learning_rate": 4.535367545076284e-06, "loss": 0.0064, "step": 7890 }, { "epoch": 5.4665742985798405, "grad_norm": 0.331540584564209, "learning_rate": 4.534674063800278e-06, "loss": 0.0092, "step": 7891 }, { "epoch": 5.467267059231036, "grad_norm": 0.5112343430519104, "learning_rate": 4.533980582524272e-06, "loss": 0.0098, "step": 7892 }, { "epoch": 5.467959819882231, "grad_norm": 0.21132290363311768, "learning_rate": 4.533287101248266e-06, "loss": 0.0051, "step": 7893 }, { "epoch": 5.468652580533425, "grad_norm": 0.2255939096212387, "learning_rate": 4.532593619972261e-06, "loss": 0.0045, "step": 7894 }, { "epoch": 5.469345341184621, "grad_norm": 0.3799256384372711, "learning_rate": 4.531900138696256e-06, "loss": 0.0072, "step": 7895 }, { "epoch": 5.470038101835816, "grad_norm": 0.2559894323348999, "learning_rate": 4.53120665742025e-06, "loss": 0.0064, "step": 7896 }, { "epoch": 5.47073086248701, "grad_norm": 0.27032461762428284, "learning_rate": 4.530513176144244e-06, "loss": 0.0076, "step": 7897 }, { "epoch": 5.471423623138206, "grad_norm": 0.40068286657333374, "learning_rate": 4.529819694868239e-06, "loss": 0.0115, "step": 7898 }, { "epoch": 5.472116383789401, "grad_norm": 0.33502882719039917, "learning_rate": 4.529126213592233e-06, "loss": 0.0078, "step": 7899 }, { "epoch": 5.472809144440596, "grad_norm": 0.3175271153450012, "learning_rate": 4.528432732316228e-06, "loss": 0.0077, "step": 7900 }, { "epoch": 5.473501905091791, "grad_norm": 0.2385866641998291, "learning_rate": 4.527739251040222e-06, "loss": 0.0054, "step": 7901 }, { "epoch": 5.474194665742986, "grad_norm": 0.2599616050720215, "learning_rate": 4.527045769764216e-06, "loss": 0.0054, "step": 7902 }, { "epoch": 5.474887426394181, "grad_norm": 0.30545392632484436, "learning_rate": 4.526352288488211e-06, "loss": 0.007, "step": 7903 }, { "epoch": 5.4755801870453755, "grad_norm": 0.32731249928474426, "learning_rate": 4.525658807212205e-06, "loss": 0.0089, "step": 7904 }, { "epoch": 5.476272947696571, "grad_norm": 0.37971776723861694, "learning_rate": 4.5249653259362e-06, "loss": 0.0076, "step": 7905 }, { "epoch": 5.476965708347766, "grad_norm": 0.40346983075141907, "learning_rate": 4.524271844660194e-06, "loss": 0.0058, "step": 7906 }, { "epoch": 5.4776584689989605, "grad_norm": 0.37472909688949585, "learning_rate": 4.523578363384189e-06, "loss": 0.0075, "step": 7907 }, { "epoch": 5.478351229650156, "grad_norm": 0.6266420483589172, "learning_rate": 4.522884882108183e-06, "loss": 0.0102, "step": 7908 }, { "epoch": 5.479043990301351, "grad_norm": 0.43866774439811707, "learning_rate": 4.5221914008321775e-06, "loss": 0.0077, "step": 7909 }, { "epoch": 5.479736750952546, "grad_norm": 0.3060195744037628, "learning_rate": 4.5214979195561724e-06, "loss": 0.0057, "step": 7910 }, { "epoch": 5.480429511603741, "grad_norm": 0.3488198518753052, "learning_rate": 4.5208044382801665e-06, "loss": 0.0082, "step": 7911 }, { "epoch": 5.481122272254936, "grad_norm": 0.2779553532600403, "learning_rate": 4.5201109570041614e-06, "loss": 0.0057, "step": 7912 }, { "epoch": 5.481815032906131, "grad_norm": 0.20279371738433838, "learning_rate": 4.5194174757281555e-06, "loss": 0.0055, "step": 7913 }, { "epoch": 5.482507793557326, "grad_norm": 0.24038439989089966, "learning_rate": 4.51872399445215e-06, "loss": 0.0046, "step": 7914 }, { "epoch": 5.483200554208521, "grad_norm": 0.28487125039100647, "learning_rate": 4.5180305131761446e-06, "loss": 0.0063, "step": 7915 }, { "epoch": 5.483893314859716, "grad_norm": 0.2544689178466797, "learning_rate": 4.5173370319001395e-06, "loss": 0.007, "step": 7916 }, { "epoch": 5.484586075510911, "grad_norm": 0.3484251797199249, "learning_rate": 4.5166435506241336e-06, "loss": 0.0096, "step": 7917 }, { "epoch": 5.485278836162106, "grad_norm": 0.2558421790599823, "learning_rate": 4.515950069348128e-06, "loss": 0.0055, "step": 7918 }, { "epoch": 5.485971596813301, "grad_norm": 0.28450798988342285, "learning_rate": 4.515256588072123e-06, "loss": 0.006, "step": 7919 }, { "epoch": 5.486664357464496, "grad_norm": 0.30505290627479553, "learning_rate": 4.514563106796117e-06, "loss": 0.0054, "step": 7920 }, { "epoch": 5.487357118115691, "grad_norm": 0.32605981826782227, "learning_rate": 4.513869625520112e-06, "loss": 0.0102, "step": 7921 }, { "epoch": 5.488049878766886, "grad_norm": 0.3867191672325134, "learning_rate": 4.513176144244106e-06, "loss": 0.0097, "step": 7922 }, { "epoch": 5.488742639418081, "grad_norm": 0.33257973194122314, "learning_rate": 4.5124826629681e-06, "loss": 0.0103, "step": 7923 }, { "epoch": 5.489435400069276, "grad_norm": 0.3549240231513977, "learning_rate": 4.511789181692095e-06, "loss": 0.0079, "step": 7924 }, { "epoch": 5.490128160720471, "grad_norm": 0.530772864818573, "learning_rate": 4.51109570041609e-06, "loss": 0.0091, "step": 7925 }, { "epoch": 5.490820921371666, "grad_norm": 0.5482567548751831, "learning_rate": 4.510402219140084e-06, "loss": 0.0077, "step": 7926 }, { "epoch": 5.491513682022861, "grad_norm": 0.24993811547756195, "learning_rate": 4.509708737864078e-06, "loss": 0.0061, "step": 7927 }, { "epoch": 5.492206442674056, "grad_norm": 0.43241819739341736, "learning_rate": 4.509015256588073e-06, "loss": 0.0081, "step": 7928 }, { "epoch": 5.492899203325251, "grad_norm": 0.48345640301704407, "learning_rate": 4.508321775312067e-06, "loss": 0.0136, "step": 7929 }, { "epoch": 5.493591963976446, "grad_norm": 0.2781326472759247, "learning_rate": 4.507628294036062e-06, "loss": 0.006, "step": 7930 }, { "epoch": 5.494284724627641, "grad_norm": 0.3042835593223572, "learning_rate": 4.506934812760056e-06, "loss": 0.0077, "step": 7931 }, { "epoch": 5.494977485278836, "grad_norm": 0.32773521542549133, "learning_rate": 4.50624133148405e-06, "loss": 0.0066, "step": 7932 }, { "epoch": 5.4956702459300315, "grad_norm": 0.41129395365715027, "learning_rate": 4.505547850208045e-06, "loss": 0.0077, "step": 7933 }, { "epoch": 5.496363006581226, "grad_norm": 0.3410695493221283, "learning_rate": 4.504854368932039e-06, "loss": 0.0073, "step": 7934 }, { "epoch": 5.497055767232421, "grad_norm": 0.41703978180885315, "learning_rate": 4.504160887656034e-06, "loss": 0.0084, "step": 7935 }, { "epoch": 5.4977485278836165, "grad_norm": 0.5004681944847107, "learning_rate": 4.503467406380028e-06, "loss": 0.0082, "step": 7936 }, { "epoch": 5.498441288534811, "grad_norm": 0.35817739367485046, "learning_rate": 4.502773925104023e-06, "loss": 0.007, "step": 7937 }, { "epoch": 5.499134049186006, "grad_norm": 0.2824252247810364, "learning_rate": 4.502080443828017e-06, "loss": 0.0046, "step": 7938 }, { "epoch": 5.499826809837201, "grad_norm": 0.3260112404823303, "learning_rate": 4.501386962552011e-06, "loss": 0.007, "step": 7939 }, { "epoch": 5.500519570488397, "grad_norm": 0.2906835079193115, "learning_rate": 4.500693481276006e-06, "loss": 0.0053, "step": 7940 }, { "epoch": 5.501212331139591, "grad_norm": 0.36243653297424316, "learning_rate": 4.5e-06, "loss": 0.0059, "step": 7941 }, { "epoch": 5.501905091790786, "grad_norm": 0.3283509314060211, "learning_rate": 4.499306518723995e-06, "loss": 0.0064, "step": 7942 }, { "epoch": 5.502597852441982, "grad_norm": 0.4903525114059448, "learning_rate": 4.498613037447989e-06, "loss": 0.0085, "step": 7943 }, { "epoch": 5.503290613093176, "grad_norm": 0.48571836948394775, "learning_rate": 4.497919556171983e-06, "loss": 0.0077, "step": 7944 }, { "epoch": 5.503983373744371, "grad_norm": 0.40723055601119995, "learning_rate": 4.497226074895978e-06, "loss": 0.0111, "step": 7945 }, { "epoch": 5.504676134395567, "grad_norm": 0.2866261303424835, "learning_rate": 4.496532593619973e-06, "loss": 0.005, "step": 7946 }, { "epoch": 5.505368895046761, "grad_norm": 0.2458246499300003, "learning_rate": 4.495839112343967e-06, "loss": 0.005, "step": 7947 }, { "epoch": 5.506061655697956, "grad_norm": 0.5143068432807922, "learning_rate": 4.495145631067961e-06, "loss": 0.008, "step": 7948 }, { "epoch": 5.5067544163491515, "grad_norm": 0.32183995842933655, "learning_rate": 4.494452149791955e-06, "loss": 0.0075, "step": 7949 }, { "epoch": 5.507447177000346, "grad_norm": 0.2839590609073639, "learning_rate": 4.49375866851595e-06, "loss": 0.0058, "step": 7950 }, { "epoch": 5.508139937651541, "grad_norm": 0.40156957507133484, "learning_rate": 4.493065187239945e-06, "loss": 0.0053, "step": 7951 }, { "epoch": 5.5088326983027365, "grad_norm": 0.169826939702034, "learning_rate": 4.492371705963939e-06, "loss": 0.0039, "step": 7952 }, { "epoch": 5.509525458953932, "grad_norm": 0.3291435241699219, "learning_rate": 4.491678224687933e-06, "loss": 0.01, "step": 7953 }, { "epoch": 5.510218219605126, "grad_norm": 0.28348952531814575, "learning_rate": 4.490984743411928e-06, "loss": 0.0056, "step": 7954 }, { "epoch": 5.510910980256321, "grad_norm": 0.2896806299686432, "learning_rate": 4.490291262135923e-06, "loss": 0.0072, "step": 7955 }, { "epoch": 5.511603740907517, "grad_norm": 0.30665791034698486, "learning_rate": 4.489597780859917e-06, "loss": 0.0054, "step": 7956 }, { "epoch": 5.512296501558711, "grad_norm": 0.3430306613445282, "learning_rate": 4.4889042995839114e-06, "loss": 0.0065, "step": 7957 }, { "epoch": 5.512989262209906, "grad_norm": 0.40963706374168396, "learning_rate": 4.488210818307906e-06, "loss": 0.0062, "step": 7958 }, { "epoch": 5.513682022861102, "grad_norm": 0.3337137997150421, "learning_rate": 4.4875173370319004e-06, "loss": 0.0076, "step": 7959 }, { "epoch": 5.514374783512297, "grad_norm": 0.39136621356010437, "learning_rate": 4.486823855755895e-06, "loss": 0.008, "step": 7960 }, { "epoch": 5.515067544163491, "grad_norm": 0.35691195726394653, "learning_rate": 4.4861303744798895e-06, "loss": 0.0074, "step": 7961 }, { "epoch": 5.515760304814687, "grad_norm": 0.40752115845680237, "learning_rate": 4.4854368932038836e-06, "loss": 0.0067, "step": 7962 }, { "epoch": 5.516453065465882, "grad_norm": 0.6623293161392212, "learning_rate": 4.4847434119278785e-06, "loss": 0.0132, "step": 7963 }, { "epoch": 5.517145826117076, "grad_norm": 0.2455042600631714, "learning_rate": 4.4840499306518726e-06, "loss": 0.005, "step": 7964 }, { "epoch": 5.5178385867682715, "grad_norm": 0.41237589716911316, "learning_rate": 4.4833564493758675e-06, "loss": 0.0058, "step": 7965 }, { "epoch": 5.518531347419467, "grad_norm": 0.4208453297615051, "learning_rate": 4.482662968099862e-06, "loss": 0.0068, "step": 7966 }, { "epoch": 5.519224108070661, "grad_norm": 0.46869662404060364, "learning_rate": 4.4819694868238565e-06, "loss": 0.0131, "step": 7967 }, { "epoch": 5.5199168687218565, "grad_norm": 0.5133314728736877, "learning_rate": 4.481276005547851e-06, "loss": 0.0109, "step": 7968 }, { "epoch": 5.520609629373052, "grad_norm": 0.28814154863357544, "learning_rate": 4.480582524271845e-06, "loss": 0.0059, "step": 7969 }, { "epoch": 5.521302390024246, "grad_norm": 0.36473405361175537, "learning_rate": 4.47988904299584e-06, "loss": 0.0096, "step": 7970 }, { "epoch": 5.521995150675441, "grad_norm": 0.3517254889011383, "learning_rate": 4.479195561719834e-06, "loss": 0.0085, "step": 7971 }, { "epoch": 5.522687911326637, "grad_norm": 0.2761370837688446, "learning_rate": 4.478502080443829e-06, "loss": 0.0062, "step": 7972 }, { "epoch": 5.523380671977832, "grad_norm": 0.3428029716014862, "learning_rate": 4.477808599167823e-06, "loss": 0.0074, "step": 7973 }, { "epoch": 5.524073432629026, "grad_norm": 0.36943402886390686, "learning_rate": 4.477115117891817e-06, "loss": 0.0084, "step": 7974 }, { "epoch": 5.524766193280222, "grad_norm": 0.48313096165657043, "learning_rate": 4.476421636615812e-06, "loss": 0.011, "step": 7975 }, { "epoch": 5.525458953931417, "grad_norm": 0.32100197672843933, "learning_rate": 4.475728155339807e-06, "loss": 0.0062, "step": 7976 }, { "epoch": 5.526151714582611, "grad_norm": 0.2223215103149414, "learning_rate": 4.475034674063801e-06, "loss": 0.0049, "step": 7977 }, { "epoch": 5.526844475233807, "grad_norm": 0.3528873324394226, "learning_rate": 4.474341192787795e-06, "loss": 0.0075, "step": 7978 }, { "epoch": 5.527537235885002, "grad_norm": 0.29667994379997253, "learning_rate": 4.473647711511789e-06, "loss": 0.0074, "step": 7979 }, { "epoch": 5.528229996536197, "grad_norm": 0.2997491657733917, "learning_rate": 4.472954230235784e-06, "loss": 0.0075, "step": 7980 }, { "epoch": 5.528922757187392, "grad_norm": 0.26284554600715637, "learning_rate": 4.472260748959779e-06, "loss": 0.0059, "step": 7981 }, { "epoch": 5.529615517838587, "grad_norm": 0.4108389616012573, "learning_rate": 4.471567267683773e-06, "loss": 0.0085, "step": 7982 }, { "epoch": 5.530308278489782, "grad_norm": 0.4730110764503479, "learning_rate": 4.470873786407767e-06, "loss": 0.0072, "step": 7983 }, { "epoch": 5.5310010391409765, "grad_norm": 0.5832785367965698, "learning_rate": 4.470180305131762e-06, "loss": 0.0112, "step": 7984 }, { "epoch": 5.531693799792172, "grad_norm": 0.3889367878437042, "learning_rate": 4.469486823855757e-06, "loss": 0.0063, "step": 7985 }, { "epoch": 5.532386560443367, "grad_norm": 0.332314670085907, "learning_rate": 4.468793342579751e-06, "loss": 0.0072, "step": 7986 }, { "epoch": 5.5330793210945615, "grad_norm": 0.32923293113708496, "learning_rate": 4.468099861303745e-06, "loss": 0.0068, "step": 7987 }, { "epoch": 5.533772081745757, "grad_norm": 0.3131104111671448, "learning_rate": 4.467406380027739e-06, "loss": 0.0077, "step": 7988 }, { "epoch": 5.534464842396952, "grad_norm": 0.5339545011520386, "learning_rate": 4.466712898751734e-06, "loss": 0.0098, "step": 7989 }, { "epoch": 5.535157603048146, "grad_norm": 0.32775241136550903, "learning_rate": 4.466019417475729e-06, "loss": 0.0063, "step": 7990 }, { "epoch": 5.535850363699342, "grad_norm": 0.2774968445301056, "learning_rate": 4.465325936199723e-06, "loss": 0.005, "step": 7991 }, { "epoch": 5.536543124350537, "grad_norm": 0.33874380588531494, "learning_rate": 4.464632454923717e-06, "loss": 0.0069, "step": 7992 }, { "epoch": 5.537235885001732, "grad_norm": 0.30800774693489075, "learning_rate": 4.463938973647712e-06, "loss": 0.005, "step": 7993 }, { "epoch": 5.537928645652927, "grad_norm": 0.3378745913505554, "learning_rate": 4.463245492371706e-06, "loss": 0.0076, "step": 7994 }, { "epoch": 5.538621406304122, "grad_norm": 0.3484329879283905, "learning_rate": 4.462552011095701e-06, "loss": 0.007, "step": 7995 }, { "epoch": 5.539314166955317, "grad_norm": 0.3457847833633423, "learning_rate": 4.461858529819695e-06, "loss": 0.0086, "step": 7996 }, { "epoch": 5.540006927606512, "grad_norm": 0.3561868965625763, "learning_rate": 4.461165048543689e-06, "loss": 0.0062, "step": 7997 }, { "epoch": 5.540699688257707, "grad_norm": 0.3368603587150574, "learning_rate": 4.460471567267684e-06, "loss": 0.0083, "step": 7998 }, { "epoch": 5.541392448908902, "grad_norm": 0.46497246623039246, "learning_rate": 4.459778085991678e-06, "loss": 0.006, "step": 7999 }, { "epoch": 5.542085209560097, "grad_norm": 0.5414943695068359, "learning_rate": 4.459084604715673e-06, "loss": 0.0078, "step": 8000 }, { "epoch": 5.542777970211292, "grad_norm": 0.47564202547073364, "learning_rate": 4.458391123439667e-06, "loss": 0.0089, "step": 8001 }, { "epoch": 5.543470730862487, "grad_norm": 0.2951892018318176, "learning_rate": 4.457697642163662e-06, "loss": 0.0056, "step": 8002 }, { "epoch": 5.544163491513682, "grad_norm": 0.24927709996700287, "learning_rate": 4.457004160887656e-06, "loss": 0.0059, "step": 8003 }, { "epoch": 5.544856252164877, "grad_norm": 0.44315189123153687, "learning_rate": 4.4563106796116504e-06, "loss": 0.0095, "step": 8004 }, { "epoch": 5.545549012816072, "grad_norm": 0.44241759181022644, "learning_rate": 4.455617198335645e-06, "loss": 0.0113, "step": 8005 }, { "epoch": 5.546241773467267, "grad_norm": 0.3930674195289612, "learning_rate": 4.45492371705964e-06, "loss": 0.0071, "step": 8006 }, { "epoch": 5.546934534118462, "grad_norm": 0.3861844837665558, "learning_rate": 4.454230235783634e-06, "loss": 0.007, "step": 8007 }, { "epoch": 5.547627294769657, "grad_norm": 0.2811047434806824, "learning_rate": 4.4535367545076285e-06, "loss": 0.0067, "step": 8008 }, { "epoch": 5.548320055420852, "grad_norm": 0.43750813603401184, "learning_rate": 4.4528432732316226e-06, "loss": 0.0077, "step": 8009 }, { "epoch": 5.549012816072047, "grad_norm": 0.26658254861831665, "learning_rate": 4.4521497919556175e-06, "loss": 0.0056, "step": 8010 }, { "epoch": 5.549705576723242, "grad_norm": 0.5692890286445618, "learning_rate": 4.451456310679612e-06, "loss": 0.0111, "step": 8011 }, { "epoch": 5.550398337374437, "grad_norm": 0.4793515205383301, "learning_rate": 4.4507628294036065e-06, "loss": 0.0093, "step": 8012 }, { "epoch": 5.5510910980256325, "grad_norm": 0.34090977907180786, "learning_rate": 4.450069348127601e-06, "loss": 0.0099, "step": 8013 }, { "epoch": 5.551783858676827, "grad_norm": 0.5725679397583008, "learning_rate": 4.4493758668515955e-06, "loss": 0.0099, "step": 8014 }, { "epoch": 5.552476619328022, "grad_norm": 0.30535051226615906, "learning_rate": 4.4486823855755905e-06, "loss": 0.0042, "step": 8015 }, { "epoch": 5.5531693799792174, "grad_norm": 0.45637768507003784, "learning_rate": 4.4479889042995845e-06, "loss": 0.0106, "step": 8016 }, { "epoch": 5.553862140630412, "grad_norm": 0.31168192625045776, "learning_rate": 4.447295423023579e-06, "loss": 0.006, "step": 8017 }, { "epoch": 5.554554901281607, "grad_norm": 0.29973074793815613, "learning_rate": 4.446601941747573e-06, "loss": 0.006, "step": 8018 }, { "epoch": 5.555247661932802, "grad_norm": 0.5181790590286255, "learning_rate": 4.445908460471568e-06, "loss": 0.0067, "step": 8019 }, { "epoch": 5.555940422583998, "grad_norm": 0.573112964630127, "learning_rate": 4.445214979195563e-06, "loss": 0.0104, "step": 8020 }, { "epoch": 5.556633183235192, "grad_norm": 1.0122028589248657, "learning_rate": 4.444521497919557e-06, "loss": 0.0075, "step": 8021 }, { "epoch": 5.557325943886387, "grad_norm": 0.35621264576911926, "learning_rate": 4.443828016643551e-06, "loss": 0.0066, "step": 8022 }, { "epoch": 5.558018704537583, "grad_norm": 0.4715633690357208, "learning_rate": 4.443134535367546e-06, "loss": 0.0085, "step": 8023 }, { "epoch": 5.558711465188777, "grad_norm": 0.5500308275222778, "learning_rate": 4.44244105409154e-06, "loss": 0.0074, "step": 8024 }, { "epoch": 5.559404225839972, "grad_norm": 0.48288464546203613, "learning_rate": 4.441747572815535e-06, "loss": 0.009, "step": 8025 }, { "epoch": 5.560096986491168, "grad_norm": 0.36384445428848267, "learning_rate": 4.441054091539529e-06, "loss": 0.0085, "step": 8026 }, { "epoch": 5.560789747142362, "grad_norm": 0.4242739975452423, "learning_rate": 4.440360610263523e-06, "loss": 0.0125, "step": 8027 }, { "epoch": 5.561482507793557, "grad_norm": 0.30751749873161316, "learning_rate": 4.439667128987518e-06, "loss": 0.0072, "step": 8028 }, { "epoch": 5.5621752684447525, "grad_norm": 0.3378515839576721, "learning_rate": 4.438973647711512e-06, "loss": 0.0083, "step": 8029 }, { "epoch": 5.562868029095947, "grad_norm": 0.4439280331134796, "learning_rate": 4.438280166435507e-06, "loss": 0.0136, "step": 8030 }, { "epoch": 5.563560789747142, "grad_norm": 0.33129703998565674, "learning_rate": 4.437586685159501e-06, "loss": 0.0097, "step": 8031 }, { "epoch": 5.5642535503983375, "grad_norm": 0.25259390473365784, "learning_rate": 4.436893203883496e-06, "loss": 0.0074, "step": 8032 }, { "epoch": 5.564946311049533, "grad_norm": 0.27036571502685547, "learning_rate": 4.43619972260749e-06, "loss": 0.0066, "step": 8033 }, { "epoch": 5.565639071700727, "grad_norm": 0.30866026878356934, "learning_rate": 4.435506241331484e-06, "loss": 0.0066, "step": 8034 }, { "epoch": 5.566331832351922, "grad_norm": 0.3763596713542938, "learning_rate": 4.434812760055479e-06, "loss": 0.0083, "step": 8035 }, { "epoch": 5.567024593003118, "grad_norm": 0.44617846608161926, "learning_rate": 4.434119278779473e-06, "loss": 0.0074, "step": 8036 }, { "epoch": 5.567717353654312, "grad_norm": 0.4959987699985504, "learning_rate": 4.433425797503468e-06, "loss": 0.0088, "step": 8037 }, { "epoch": 5.568410114305507, "grad_norm": 0.41230276226997375, "learning_rate": 4.432732316227462e-06, "loss": 0.0081, "step": 8038 }, { "epoch": 5.569102874956703, "grad_norm": 0.330306738615036, "learning_rate": 4.432038834951456e-06, "loss": 0.007, "step": 8039 }, { "epoch": 5.569795635607898, "grad_norm": 0.33954522013664246, "learning_rate": 4.431345353675451e-06, "loss": 0.0066, "step": 8040 }, { "epoch": 5.570488396259092, "grad_norm": 0.47080641984939575, "learning_rate": 4.430651872399446e-06, "loss": 0.0121, "step": 8041 }, { "epoch": 5.571181156910288, "grad_norm": 0.3344554305076599, "learning_rate": 4.42995839112344e-06, "loss": 0.0079, "step": 8042 }, { "epoch": 5.571873917561483, "grad_norm": 0.4823695421218872, "learning_rate": 4.429264909847434e-06, "loss": 0.0102, "step": 8043 }, { "epoch": 5.572566678212677, "grad_norm": 0.35360637307167053, "learning_rate": 4.428571428571429e-06, "loss": 0.0057, "step": 8044 }, { "epoch": 5.5732594388638725, "grad_norm": 0.31102123856544495, "learning_rate": 4.427877947295424e-06, "loss": 0.0062, "step": 8045 }, { "epoch": 5.573952199515068, "grad_norm": 0.4725441038608551, "learning_rate": 4.427184466019418e-06, "loss": 0.0127, "step": 8046 }, { "epoch": 5.574644960166262, "grad_norm": 0.7464017271995544, "learning_rate": 4.426490984743412e-06, "loss": 0.0073, "step": 8047 }, { "epoch": 5.5753377208174575, "grad_norm": 0.4012281000614166, "learning_rate": 4.425797503467406e-06, "loss": 0.0092, "step": 8048 }, { "epoch": 5.576030481468653, "grad_norm": 0.27611708641052246, "learning_rate": 4.425104022191401e-06, "loss": 0.0062, "step": 8049 }, { "epoch": 5.576723242119847, "grad_norm": 0.3412626385688782, "learning_rate": 4.424410540915396e-06, "loss": 0.0063, "step": 8050 }, { "epoch": 5.577416002771042, "grad_norm": 0.3446902930736542, "learning_rate": 4.42371705963939e-06, "loss": 0.0066, "step": 8051 }, { "epoch": 5.578108763422238, "grad_norm": 0.30243048071861267, "learning_rate": 4.423023578363384e-06, "loss": 0.0075, "step": 8052 }, { "epoch": 5.578801524073433, "grad_norm": 0.316084623336792, "learning_rate": 4.422330097087379e-06, "loss": 0.0095, "step": 8053 }, { "epoch": 5.579494284724627, "grad_norm": 0.36083683371543884, "learning_rate": 4.421636615811373e-06, "loss": 0.0088, "step": 8054 }, { "epoch": 5.580187045375823, "grad_norm": 0.4106943905353546, "learning_rate": 4.420943134535368e-06, "loss": 0.0079, "step": 8055 }, { "epoch": 5.580879806027018, "grad_norm": 0.465559184551239, "learning_rate": 4.420249653259362e-06, "loss": 0.007, "step": 8056 }, { "epoch": 5.581572566678212, "grad_norm": 0.3268836438655853, "learning_rate": 4.4195561719833565e-06, "loss": 0.0065, "step": 8057 }, { "epoch": 5.582265327329408, "grad_norm": 0.458279550075531, "learning_rate": 4.418862690707351e-06, "loss": 0.0106, "step": 8058 }, { "epoch": 5.582958087980603, "grad_norm": 0.2828100323677063, "learning_rate": 4.4181692094313455e-06, "loss": 0.0056, "step": 8059 }, { "epoch": 5.583650848631798, "grad_norm": 0.5296974778175354, "learning_rate": 4.4174757281553404e-06, "loss": 0.0066, "step": 8060 }, { "epoch": 5.584343609282993, "grad_norm": 0.43544599413871765, "learning_rate": 4.4167822468793345e-06, "loss": 0.0104, "step": 8061 }, { "epoch": 5.585036369934188, "grad_norm": 0.3860195577144623, "learning_rate": 4.4160887656033295e-06, "loss": 0.0083, "step": 8062 }, { "epoch": 5.585729130585383, "grad_norm": 0.3217155933380127, "learning_rate": 4.4153952843273235e-06, "loss": 0.0071, "step": 8063 }, { "epoch": 5.5864218912365775, "grad_norm": 0.43554022908210754, "learning_rate": 4.414701803051318e-06, "loss": 0.0071, "step": 8064 }, { "epoch": 5.587114651887773, "grad_norm": 0.34503835439682007, "learning_rate": 4.4140083217753126e-06, "loss": 0.0094, "step": 8065 }, { "epoch": 5.587807412538968, "grad_norm": 0.33823269605636597, "learning_rate": 4.413314840499307e-06, "loss": 0.0071, "step": 8066 }, { "epoch": 5.5885001731901625, "grad_norm": 0.35930171608924866, "learning_rate": 4.412621359223302e-06, "loss": 0.0078, "step": 8067 }, { "epoch": 5.589192933841358, "grad_norm": 0.5095252394676208, "learning_rate": 4.411927877947296e-06, "loss": 0.0087, "step": 8068 }, { "epoch": 5.589885694492553, "grad_norm": 0.28732743859291077, "learning_rate": 4.41123439667129e-06, "loss": 0.0063, "step": 8069 }, { "epoch": 5.590578455143747, "grad_norm": 0.37983229756355286, "learning_rate": 4.410540915395285e-06, "loss": 0.0079, "step": 8070 }, { "epoch": 5.591271215794943, "grad_norm": 0.5520676374435425, "learning_rate": 4.40984743411928e-06, "loss": 0.0136, "step": 8071 }, { "epoch": 5.591963976446138, "grad_norm": 0.2988365590572357, "learning_rate": 4.409153952843274e-06, "loss": 0.0057, "step": 8072 }, { "epoch": 5.592656737097333, "grad_norm": 0.33575090765953064, "learning_rate": 4.408460471567268e-06, "loss": 0.0101, "step": 8073 }, { "epoch": 5.593349497748528, "grad_norm": 0.2537892758846283, "learning_rate": 4.407766990291263e-06, "loss": 0.0049, "step": 8074 }, { "epoch": 5.594042258399723, "grad_norm": 0.31315284967422485, "learning_rate": 4.407073509015257e-06, "loss": 0.0073, "step": 8075 }, { "epoch": 5.594735019050918, "grad_norm": 0.5253440737724304, "learning_rate": 4.406380027739252e-06, "loss": 0.0085, "step": 8076 }, { "epoch": 5.595427779702113, "grad_norm": 0.5795570015907288, "learning_rate": 4.405686546463246e-06, "loss": 0.0092, "step": 8077 }, { "epoch": 5.596120540353308, "grad_norm": 0.3438153564929962, "learning_rate": 4.40499306518724e-06, "loss": 0.0061, "step": 8078 }, { "epoch": 5.596813301004503, "grad_norm": 0.33880171179771423, "learning_rate": 4.404299583911235e-06, "loss": 0.0084, "step": 8079 }, { "epoch": 5.597506061655698, "grad_norm": 0.3033163547515869, "learning_rate": 4.40360610263523e-06, "loss": 0.0056, "step": 8080 }, { "epoch": 5.598198822306893, "grad_norm": 0.28660401701927185, "learning_rate": 4.402912621359224e-06, "loss": 0.0071, "step": 8081 }, { "epoch": 5.598891582958088, "grad_norm": 0.22764852643013, "learning_rate": 4.402219140083218e-06, "loss": 0.0069, "step": 8082 }, { "epoch": 5.599584343609283, "grad_norm": 0.28526607155799866, "learning_rate": 4.401525658807213e-06, "loss": 0.0064, "step": 8083 }, { "epoch": 5.600277104260478, "grad_norm": 0.36071881651878357, "learning_rate": 4.400832177531207e-06, "loss": 0.0082, "step": 8084 }, { "epoch": 5.600969864911673, "grad_norm": 0.3269389569759369, "learning_rate": 4.400138696255202e-06, "loss": 0.0079, "step": 8085 }, { "epoch": 5.601662625562868, "grad_norm": 0.3415747284889221, "learning_rate": 4.399445214979196e-06, "loss": 0.0078, "step": 8086 }, { "epoch": 5.602355386214063, "grad_norm": 0.23844791948795319, "learning_rate": 4.39875173370319e-06, "loss": 0.0058, "step": 8087 }, { "epoch": 5.603048146865258, "grad_norm": 0.49624690413475037, "learning_rate": 4.398058252427185e-06, "loss": 0.0078, "step": 8088 }, { "epoch": 5.603740907516453, "grad_norm": 0.42024341225624084, "learning_rate": 4.397364771151179e-06, "loss": 0.0101, "step": 8089 }, { "epoch": 5.604433668167648, "grad_norm": 0.4434609115123749, "learning_rate": 4.396671289875174e-06, "loss": 0.0077, "step": 8090 }, { "epoch": 5.605126428818843, "grad_norm": 0.37759512662887573, "learning_rate": 4.395977808599168e-06, "loss": 0.009, "step": 8091 }, { "epoch": 5.605819189470038, "grad_norm": 0.39756596088409424, "learning_rate": 4.395284327323163e-06, "loss": 0.0085, "step": 8092 }, { "epoch": 5.6065119501212335, "grad_norm": 0.33069220185279846, "learning_rate": 4.394590846047157e-06, "loss": 0.0066, "step": 8093 }, { "epoch": 5.607204710772428, "grad_norm": 0.3371417224407196, "learning_rate": 4.393897364771151e-06, "loss": 0.0044, "step": 8094 }, { "epoch": 5.607897471423623, "grad_norm": 0.3080448508262634, "learning_rate": 4.393203883495146e-06, "loss": 0.0071, "step": 8095 }, { "epoch": 5.608590232074818, "grad_norm": 0.30030423402786255, "learning_rate": 4.39251040221914e-06, "loss": 0.0049, "step": 8096 }, { "epoch": 5.609282992726013, "grad_norm": 0.4218898117542267, "learning_rate": 4.391816920943135e-06, "loss": 0.0093, "step": 8097 }, { "epoch": 5.609975753377208, "grad_norm": 0.3682861030101776, "learning_rate": 4.391123439667129e-06, "loss": 0.0069, "step": 8098 }, { "epoch": 5.610668514028403, "grad_norm": 0.5085688829421997, "learning_rate": 4.390429958391123e-06, "loss": 0.0086, "step": 8099 }, { "epoch": 5.611361274679599, "grad_norm": 0.37401247024536133, "learning_rate": 4.389736477115118e-06, "loss": 0.0081, "step": 8100 }, { "epoch": 5.612054035330793, "grad_norm": 0.3405166566371918, "learning_rate": 4.389042995839113e-06, "loss": 0.0063, "step": 8101 }, { "epoch": 5.612746795981988, "grad_norm": 0.279582679271698, "learning_rate": 4.388349514563107e-06, "loss": 0.007, "step": 8102 }, { "epoch": 5.613439556633184, "grad_norm": 0.2885138690471649, "learning_rate": 4.387656033287101e-06, "loss": 0.0063, "step": 8103 }, { "epoch": 5.614132317284378, "grad_norm": 0.2857093811035156, "learning_rate": 4.3869625520110955e-06, "loss": 0.0055, "step": 8104 }, { "epoch": 5.614825077935573, "grad_norm": 0.3629360496997833, "learning_rate": 4.38626907073509e-06, "loss": 0.0074, "step": 8105 }, { "epoch": 5.615517838586769, "grad_norm": 0.8724383115768433, "learning_rate": 4.385575589459085e-06, "loss": 0.0094, "step": 8106 }, { "epoch": 5.616210599237963, "grad_norm": 0.48356372117996216, "learning_rate": 4.3848821081830794e-06, "loss": 0.0081, "step": 8107 }, { "epoch": 5.616903359889158, "grad_norm": 0.299801230430603, "learning_rate": 4.3841886269070735e-06, "loss": 0.008, "step": 8108 }, { "epoch": 5.6175961205403535, "grad_norm": 0.28150179982185364, "learning_rate": 4.3834951456310685e-06, "loss": 0.0053, "step": 8109 }, { "epoch": 5.618288881191548, "grad_norm": 0.320665180683136, "learning_rate": 4.382801664355063e-06, "loss": 0.0057, "step": 8110 }, { "epoch": 5.618981641842743, "grad_norm": 0.35002484917640686, "learning_rate": 4.3821081830790575e-06, "loss": 0.006, "step": 8111 }, { "epoch": 5.6196744024939385, "grad_norm": 0.31823593378067017, "learning_rate": 4.3814147018030516e-06, "loss": 0.008, "step": 8112 }, { "epoch": 5.620367163145134, "grad_norm": 0.3462357521057129, "learning_rate": 4.380721220527046e-06, "loss": 0.0056, "step": 8113 }, { "epoch": 5.621059923796328, "grad_norm": 0.3695269525051117, "learning_rate": 4.380027739251041e-06, "loss": 0.0099, "step": 8114 }, { "epoch": 5.621752684447523, "grad_norm": 0.4130893349647522, "learning_rate": 4.3793342579750355e-06, "loss": 0.0091, "step": 8115 }, { "epoch": 5.622445445098719, "grad_norm": 0.40944939851760864, "learning_rate": 4.37864077669903e-06, "loss": 0.0083, "step": 8116 }, { "epoch": 5.623138205749913, "grad_norm": 0.6065247654914856, "learning_rate": 4.377947295423024e-06, "loss": 0.0092, "step": 8117 }, { "epoch": 5.623830966401108, "grad_norm": 0.4101960062980652, "learning_rate": 4.377253814147019e-06, "loss": 0.0083, "step": 8118 }, { "epoch": 5.624523727052304, "grad_norm": 0.35071033239364624, "learning_rate": 4.376560332871013e-06, "loss": 0.0058, "step": 8119 }, { "epoch": 5.625216487703499, "grad_norm": 0.34921368956565857, "learning_rate": 4.375866851595008e-06, "loss": 0.0091, "step": 8120 }, { "epoch": 5.625909248354693, "grad_norm": 0.3462977409362793, "learning_rate": 4.375173370319002e-06, "loss": 0.0061, "step": 8121 }, { "epoch": 5.626602009005889, "grad_norm": 0.35768160223960876, "learning_rate": 4.374479889042997e-06, "loss": 0.0085, "step": 8122 }, { "epoch": 5.627294769657084, "grad_norm": 0.41447946429252625, "learning_rate": 4.373786407766991e-06, "loss": 0.0059, "step": 8123 }, { "epoch": 5.627987530308278, "grad_norm": 0.37148943543434143, "learning_rate": 4.373092926490985e-06, "loss": 0.0072, "step": 8124 }, { "epoch": 5.6286802909594735, "grad_norm": 0.34060126543045044, "learning_rate": 4.372399445214979e-06, "loss": 0.0083, "step": 8125 }, { "epoch": 5.629373051610669, "grad_norm": 0.3396386206150055, "learning_rate": 4.371705963938974e-06, "loss": 0.0099, "step": 8126 }, { "epoch": 5.630065812261863, "grad_norm": 0.3414992690086365, "learning_rate": 4.371012482662969e-06, "loss": 0.0092, "step": 8127 }, { "epoch": 5.6307585729130585, "grad_norm": 0.487488329410553, "learning_rate": 4.370319001386963e-06, "loss": 0.0101, "step": 8128 }, { "epoch": 5.631451333564254, "grad_norm": 0.3113594353199005, "learning_rate": 4.369625520110957e-06, "loss": 0.0072, "step": 8129 }, { "epoch": 5.632144094215448, "grad_norm": 0.3420415222644806, "learning_rate": 4.368932038834952e-06, "loss": 0.0083, "step": 8130 }, { "epoch": 5.632836854866643, "grad_norm": 0.5112042427062988, "learning_rate": 4.368238557558947e-06, "loss": 0.0064, "step": 8131 }, { "epoch": 5.633529615517839, "grad_norm": 0.27378049492836, "learning_rate": 4.367545076282941e-06, "loss": 0.0073, "step": 8132 }, { "epoch": 5.634222376169034, "grad_norm": 0.35176554322242737, "learning_rate": 4.366851595006935e-06, "loss": 0.0116, "step": 8133 }, { "epoch": 5.634915136820228, "grad_norm": 0.40267062187194824, "learning_rate": 4.366158113730929e-06, "loss": 0.0086, "step": 8134 }, { "epoch": 5.635607897471424, "grad_norm": 0.35776427388191223, "learning_rate": 4.365464632454924e-06, "loss": 0.0102, "step": 8135 }, { "epoch": 5.636300658122619, "grad_norm": 0.34870702028274536, "learning_rate": 4.364771151178919e-06, "loss": 0.0065, "step": 8136 }, { "epoch": 5.636993418773813, "grad_norm": 0.42673560976982117, "learning_rate": 4.364077669902913e-06, "loss": 0.007, "step": 8137 }, { "epoch": 5.637686179425009, "grad_norm": 0.27707332372665405, "learning_rate": 4.363384188626907e-06, "loss": 0.0068, "step": 8138 }, { "epoch": 5.638378940076204, "grad_norm": 0.2589033842086792, "learning_rate": 4.362690707350902e-06, "loss": 0.0053, "step": 8139 }, { "epoch": 5.639071700727399, "grad_norm": 0.5663381218910217, "learning_rate": 4.361997226074896e-06, "loss": 0.0111, "step": 8140 }, { "epoch": 5.6397644613785936, "grad_norm": 0.5260714888572693, "learning_rate": 4.361303744798891e-06, "loss": 0.0088, "step": 8141 }, { "epoch": 5.640457222029789, "grad_norm": 0.3698810338973999, "learning_rate": 4.360610263522885e-06, "loss": 0.0097, "step": 8142 }, { "epoch": 5.641149982680984, "grad_norm": 0.4451572000980377, "learning_rate": 4.359916782246879e-06, "loss": 0.008, "step": 8143 }, { "epoch": 5.6418427433321785, "grad_norm": 0.4416818916797638, "learning_rate": 4.359223300970874e-06, "loss": 0.0076, "step": 8144 }, { "epoch": 5.642535503983374, "grad_norm": 0.4071839451789856, "learning_rate": 4.358529819694868e-06, "loss": 0.007, "step": 8145 }, { "epoch": 5.643228264634569, "grad_norm": 0.38316574692726135, "learning_rate": 4.357836338418863e-06, "loss": 0.0076, "step": 8146 }, { "epoch": 5.6439210252857634, "grad_norm": 0.3717910945415497, "learning_rate": 4.357142857142857e-06, "loss": 0.0078, "step": 8147 }, { "epoch": 5.644613785936959, "grad_norm": 0.2634609639644623, "learning_rate": 4.356449375866852e-06, "loss": 0.0061, "step": 8148 }, { "epoch": 5.645306546588154, "grad_norm": 0.41189005970954895, "learning_rate": 4.355755894590846e-06, "loss": 0.0099, "step": 8149 }, { "epoch": 5.645999307239348, "grad_norm": 0.41990867257118225, "learning_rate": 4.35506241331484e-06, "loss": 0.0067, "step": 8150 }, { "epoch": 5.646692067890544, "grad_norm": 0.31547811627388, "learning_rate": 4.354368932038835e-06, "loss": 0.0064, "step": 8151 }, { "epoch": 5.647384828541739, "grad_norm": 0.39213982224464417, "learning_rate": 4.353675450762829e-06, "loss": 0.0071, "step": 8152 }, { "epoch": 5.648077589192933, "grad_norm": 0.2914406657218933, "learning_rate": 4.352981969486824e-06, "loss": 0.0042, "step": 8153 }, { "epoch": 5.648770349844129, "grad_norm": 0.38466131687164307, "learning_rate": 4.3522884882108184e-06, "loss": 0.0127, "step": 8154 }, { "epoch": 5.649463110495324, "grad_norm": 0.3522290289402008, "learning_rate": 4.3515950069348125e-06, "loss": 0.0057, "step": 8155 }, { "epoch": 5.650155871146519, "grad_norm": 0.4373476803302765, "learning_rate": 4.3509015256588075e-06, "loss": 0.009, "step": 8156 }, { "epoch": 5.650848631797714, "grad_norm": 0.30540671944618225, "learning_rate": 4.350208044382802e-06, "loss": 0.0074, "step": 8157 }, { "epoch": 5.651541392448909, "grad_norm": 0.4660751223564148, "learning_rate": 4.3495145631067965e-06, "loss": 0.0086, "step": 8158 }, { "epoch": 5.652234153100104, "grad_norm": 0.41871729493141174, "learning_rate": 4.3488210818307906e-06, "loss": 0.0091, "step": 8159 }, { "epoch": 5.652926913751299, "grad_norm": 0.43146413564682007, "learning_rate": 4.3481276005547855e-06, "loss": 0.0098, "step": 8160 }, { "epoch": 5.653619674402494, "grad_norm": 0.4287135899066925, "learning_rate": 4.34743411927878e-06, "loss": 0.0091, "step": 8161 }, { "epoch": 5.654312435053689, "grad_norm": 0.3094720244407654, "learning_rate": 4.3467406380027745e-06, "loss": 0.0058, "step": 8162 }, { "epoch": 5.655005195704884, "grad_norm": 0.7001898288726807, "learning_rate": 4.346047156726769e-06, "loss": 0.0068, "step": 8163 }, { "epoch": 5.655697956356079, "grad_norm": 0.28040841221809387, "learning_rate": 4.345353675450763e-06, "loss": 0.0064, "step": 8164 }, { "epoch": 5.656390717007274, "grad_norm": 0.4943999648094177, "learning_rate": 4.344660194174758e-06, "loss": 0.0097, "step": 8165 }, { "epoch": 5.657083477658469, "grad_norm": 0.2820316553115845, "learning_rate": 4.3439667128987526e-06, "loss": 0.0052, "step": 8166 }, { "epoch": 5.657776238309664, "grad_norm": 0.4877709746360779, "learning_rate": 4.343273231622747e-06, "loss": 0.0085, "step": 8167 }, { "epoch": 5.658468998960859, "grad_norm": 0.36580315232276917, "learning_rate": 4.342579750346741e-06, "loss": 0.0068, "step": 8168 }, { "epoch": 5.659161759612054, "grad_norm": 0.5931958556175232, "learning_rate": 4.341886269070736e-06, "loss": 0.0078, "step": 8169 }, { "epoch": 5.659854520263249, "grad_norm": 0.3309081494808197, "learning_rate": 4.34119278779473e-06, "loss": 0.0093, "step": 8170 }, { "epoch": 5.660547280914444, "grad_norm": 0.4991997182369232, "learning_rate": 4.340499306518725e-06, "loss": 0.0069, "step": 8171 }, { "epoch": 5.661240041565639, "grad_norm": 0.23241691291332245, "learning_rate": 4.339805825242719e-06, "loss": 0.0047, "step": 8172 }, { "epoch": 5.661932802216834, "grad_norm": 0.3474172353744507, "learning_rate": 4.339112343966713e-06, "loss": 0.0059, "step": 8173 }, { "epoch": 5.662625562868029, "grad_norm": 0.3061229884624481, "learning_rate": 4.338418862690708e-06, "loss": 0.0073, "step": 8174 }, { "epoch": 5.663318323519224, "grad_norm": 0.4371403753757477, "learning_rate": 4.337725381414702e-06, "loss": 0.0078, "step": 8175 }, { "epoch": 5.664011084170419, "grad_norm": 0.3339928090572357, "learning_rate": 4.337031900138697e-06, "loss": 0.0105, "step": 8176 }, { "epoch": 5.664703844821614, "grad_norm": 0.5151131749153137, "learning_rate": 4.336338418862691e-06, "loss": 0.0104, "step": 8177 }, { "epoch": 5.665396605472809, "grad_norm": 0.33382347226142883, "learning_rate": 4.335644937586686e-06, "loss": 0.0078, "step": 8178 }, { "epoch": 5.666089366124004, "grad_norm": 0.33786681294441223, "learning_rate": 4.33495145631068e-06, "loss": 0.0073, "step": 8179 }, { "epoch": 5.6667821267752, "grad_norm": 0.32782819867134094, "learning_rate": 4.334257975034674e-06, "loss": 0.0081, "step": 8180 }, { "epoch": 5.667474887426394, "grad_norm": 0.32676103711128235, "learning_rate": 4.333564493758669e-06, "loss": 0.0067, "step": 8181 }, { "epoch": 5.668167648077589, "grad_norm": 0.44742289185523987, "learning_rate": 4.332871012482663e-06, "loss": 0.0072, "step": 8182 }, { "epoch": 5.668860408728785, "grad_norm": 0.6208932995796204, "learning_rate": 4.332177531206658e-06, "loss": 0.0094, "step": 8183 }, { "epoch": 5.669553169379979, "grad_norm": 0.43178027868270874, "learning_rate": 4.331484049930652e-06, "loss": 0.0078, "step": 8184 }, { "epoch": 5.670245930031174, "grad_norm": 0.32541170716285706, "learning_rate": 4.330790568654646e-06, "loss": 0.0072, "step": 8185 }, { "epoch": 5.6709386906823696, "grad_norm": 0.3351057469844818, "learning_rate": 4.330097087378641e-06, "loss": 0.0082, "step": 8186 }, { "epoch": 5.671631451333564, "grad_norm": 0.4671008586883545, "learning_rate": 4.329403606102636e-06, "loss": 0.0093, "step": 8187 }, { "epoch": 5.672324211984759, "grad_norm": 0.4235295057296753, "learning_rate": 4.32871012482663e-06, "loss": 0.0082, "step": 8188 }, { "epoch": 5.6730169726359545, "grad_norm": 0.30958467721939087, "learning_rate": 4.328016643550624e-06, "loss": 0.0068, "step": 8189 }, { "epoch": 5.673709733287149, "grad_norm": 0.2938699424266815, "learning_rate": 4.327323162274619e-06, "loss": 0.0057, "step": 8190 }, { "epoch": 5.674402493938344, "grad_norm": 0.3453691303730011, "learning_rate": 4.326629680998613e-06, "loss": 0.0075, "step": 8191 }, { "epoch": 5.6750952545895395, "grad_norm": 0.2740063965320587, "learning_rate": 4.325936199722608e-06, "loss": 0.0062, "step": 8192 }, { "epoch": 5.675788015240734, "grad_norm": 0.4536712169647217, "learning_rate": 4.325242718446602e-06, "loss": 0.0118, "step": 8193 }, { "epoch": 5.676480775891929, "grad_norm": 0.3655270040035248, "learning_rate": 4.324549237170596e-06, "loss": 0.0072, "step": 8194 }, { "epoch": 5.677173536543124, "grad_norm": 0.2964143753051758, "learning_rate": 4.323855755894591e-06, "loss": 0.0065, "step": 8195 }, { "epoch": 5.67786629719432, "grad_norm": 0.3953753709793091, "learning_rate": 4.323162274618586e-06, "loss": 0.0057, "step": 8196 }, { "epoch": 5.678559057845514, "grad_norm": 0.230918750166893, "learning_rate": 4.32246879334258e-06, "loss": 0.0054, "step": 8197 }, { "epoch": 5.679251818496709, "grad_norm": 0.32712507247924805, "learning_rate": 4.321775312066574e-06, "loss": 0.0086, "step": 8198 }, { "epoch": 5.679944579147905, "grad_norm": 0.24037069082260132, "learning_rate": 4.321081830790569e-06, "loss": 0.0054, "step": 8199 }, { "epoch": 5.6806373397991, "grad_norm": 0.34376099705696106, "learning_rate": 4.320388349514563e-06, "loss": 0.0065, "step": 8200 }, { "epoch": 5.681330100450294, "grad_norm": 0.4202915132045746, "learning_rate": 4.319694868238558e-06, "loss": 0.0061, "step": 8201 }, { "epoch": 5.68202286110149, "grad_norm": 0.3023999333381653, "learning_rate": 4.319001386962552e-06, "loss": 0.0069, "step": 8202 }, { "epoch": 5.682715621752685, "grad_norm": 0.281676709651947, "learning_rate": 4.3183079056865465e-06, "loss": 0.0056, "step": 8203 }, { "epoch": 5.683408382403879, "grad_norm": 0.41399192810058594, "learning_rate": 4.317614424410541e-06, "loss": 0.0104, "step": 8204 }, { "epoch": 5.6841011430550745, "grad_norm": 0.349400132894516, "learning_rate": 4.3169209431345355e-06, "loss": 0.0065, "step": 8205 }, { "epoch": 5.68479390370627, "grad_norm": 0.25694534182548523, "learning_rate": 4.31622746185853e-06, "loss": 0.0065, "step": 8206 }, { "epoch": 5.685486664357464, "grad_norm": 0.3727432191371918, "learning_rate": 4.3155339805825245e-06, "loss": 0.0103, "step": 8207 }, { "epoch": 5.6861794250086595, "grad_norm": 0.3524590730667114, "learning_rate": 4.3148404993065194e-06, "loss": 0.0075, "step": 8208 }, { "epoch": 5.686872185659855, "grad_norm": 0.35577112436294556, "learning_rate": 4.3141470180305135e-06, "loss": 0.0099, "step": 8209 }, { "epoch": 5.687564946311049, "grad_norm": 0.48710888624191284, "learning_rate": 4.313453536754508e-06, "loss": 0.0126, "step": 8210 }, { "epoch": 5.688257706962244, "grad_norm": 0.3545190095901489, "learning_rate": 4.3127600554785025e-06, "loss": 0.0102, "step": 8211 }, { "epoch": 5.68895046761344, "grad_norm": 0.3064030110836029, "learning_rate": 4.312066574202497e-06, "loss": 0.0066, "step": 8212 }, { "epoch": 5.689643228264634, "grad_norm": 0.5038630366325378, "learning_rate": 4.3113730929264916e-06, "loss": 0.0075, "step": 8213 }, { "epoch": 5.690335988915829, "grad_norm": 0.29983583092689514, "learning_rate": 4.310679611650486e-06, "loss": 0.0057, "step": 8214 }, { "epoch": 5.691028749567025, "grad_norm": 0.6348174810409546, "learning_rate": 4.30998613037448e-06, "loss": 0.0089, "step": 8215 }, { "epoch": 5.69172151021822, "grad_norm": 0.26088616251945496, "learning_rate": 4.309292649098475e-06, "loss": 0.0062, "step": 8216 }, { "epoch": 5.692414270869414, "grad_norm": 0.3356310725212097, "learning_rate": 4.30859916782247e-06, "loss": 0.0072, "step": 8217 }, { "epoch": 5.69310703152061, "grad_norm": 0.36710891127586365, "learning_rate": 4.307905686546464e-06, "loss": 0.0073, "step": 8218 }, { "epoch": 5.693799792171805, "grad_norm": 0.3252546489238739, "learning_rate": 4.307212205270458e-06, "loss": 0.0064, "step": 8219 }, { "epoch": 5.694492552822999, "grad_norm": 0.45620712637901306, "learning_rate": 4.306518723994452e-06, "loss": 0.0126, "step": 8220 }, { "epoch": 5.6951853134741945, "grad_norm": 0.21465623378753662, "learning_rate": 4.305825242718447e-06, "loss": 0.0047, "step": 8221 }, { "epoch": 5.69587807412539, "grad_norm": 0.40564826130867004, "learning_rate": 4.305131761442442e-06, "loss": 0.0074, "step": 8222 }, { "epoch": 5.696570834776585, "grad_norm": 0.3543236255645752, "learning_rate": 4.304438280166436e-06, "loss": 0.0066, "step": 8223 }, { "epoch": 5.6972635954277795, "grad_norm": 0.29081037640571594, "learning_rate": 4.30374479889043e-06, "loss": 0.0065, "step": 8224 }, { "epoch": 5.697956356078975, "grad_norm": 0.26856741309165955, "learning_rate": 4.303051317614425e-06, "loss": 0.006, "step": 8225 }, { "epoch": 5.69864911673017, "grad_norm": 0.42368850111961365, "learning_rate": 4.30235783633842e-06, "loss": 0.0066, "step": 8226 }, { "epoch": 5.699341877381364, "grad_norm": 0.3613841235637665, "learning_rate": 4.301664355062414e-06, "loss": 0.0077, "step": 8227 }, { "epoch": 5.70003463803256, "grad_norm": 0.314153254032135, "learning_rate": 4.300970873786408e-06, "loss": 0.0107, "step": 8228 }, { "epoch": 5.700727398683755, "grad_norm": 0.3639216125011444, "learning_rate": 4.300277392510402e-06, "loss": 0.0095, "step": 8229 }, { "epoch": 5.701420159334949, "grad_norm": 0.3123050630092621, "learning_rate": 4.299583911234397e-06, "loss": 0.0071, "step": 8230 }, { "epoch": 5.702112919986145, "grad_norm": 0.31937894225120544, "learning_rate": 4.298890429958392e-06, "loss": 0.0062, "step": 8231 }, { "epoch": 5.70280568063734, "grad_norm": 0.43743598461151123, "learning_rate": 4.298196948682386e-06, "loss": 0.0144, "step": 8232 }, { "epoch": 5.703498441288534, "grad_norm": 0.4170362055301666, "learning_rate": 4.29750346740638e-06, "loss": 0.007, "step": 8233 }, { "epoch": 5.70419120193973, "grad_norm": 0.2906656861305237, "learning_rate": 4.296809986130375e-06, "loss": 0.0053, "step": 8234 }, { "epoch": 5.704883962590925, "grad_norm": 0.2628159821033478, "learning_rate": 4.296116504854369e-06, "loss": 0.0054, "step": 8235 }, { "epoch": 5.70557672324212, "grad_norm": 0.27946367859840393, "learning_rate": 4.295423023578364e-06, "loss": 0.0055, "step": 8236 }, { "epoch": 5.706269483893315, "grad_norm": 0.40050429105758667, "learning_rate": 4.294729542302358e-06, "loss": 0.008, "step": 8237 }, { "epoch": 5.70696224454451, "grad_norm": 0.6546024680137634, "learning_rate": 4.294036061026353e-06, "loss": 0.0105, "step": 8238 }, { "epoch": 5.707655005195705, "grad_norm": 0.5611664056777954, "learning_rate": 4.293342579750347e-06, "loss": 0.0107, "step": 8239 }, { "epoch": 5.7083477658468995, "grad_norm": 0.25876954197883606, "learning_rate": 4.292649098474341e-06, "loss": 0.0054, "step": 8240 }, { "epoch": 5.709040526498095, "grad_norm": 0.38433337211608887, "learning_rate": 4.291955617198336e-06, "loss": 0.008, "step": 8241 }, { "epoch": 5.70973328714929, "grad_norm": 0.294488787651062, "learning_rate": 4.29126213592233e-06, "loss": 0.0062, "step": 8242 }, { "epoch": 5.710426047800485, "grad_norm": 0.3941182494163513, "learning_rate": 4.290568654646325e-06, "loss": 0.0076, "step": 8243 }, { "epoch": 5.71111880845168, "grad_norm": 0.4684038460254669, "learning_rate": 4.289875173370319e-06, "loss": 0.0086, "step": 8244 }, { "epoch": 5.711811569102875, "grad_norm": 0.2668977975845337, "learning_rate": 4.289181692094313e-06, "loss": 0.0048, "step": 8245 }, { "epoch": 5.71250432975407, "grad_norm": 0.9417048692703247, "learning_rate": 4.288488210818308e-06, "loss": 0.0064, "step": 8246 }, { "epoch": 5.713197090405265, "grad_norm": 0.40976911783218384, "learning_rate": 4.287794729542303e-06, "loss": 0.0063, "step": 8247 }, { "epoch": 5.71388985105646, "grad_norm": 0.46163731813430786, "learning_rate": 4.287101248266297e-06, "loss": 0.0102, "step": 8248 }, { "epoch": 5.714582611707655, "grad_norm": 0.2681505084037781, "learning_rate": 4.286407766990291e-06, "loss": 0.0061, "step": 8249 }, { "epoch": 5.71527537235885, "grad_norm": 0.5496297478675842, "learning_rate": 4.2857142857142855e-06, "loss": 0.008, "step": 8250 }, { "epoch": 5.715968133010045, "grad_norm": 0.38196346163749695, "learning_rate": 4.28502080443828e-06, "loss": 0.0076, "step": 8251 }, { "epoch": 5.71666089366124, "grad_norm": 0.8958604335784912, "learning_rate": 4.284327323162275e-06, "loss": 0.0133, "step": 8252 }, { "epoch": 5.717353654312435, "grad_norm": 0.34037333726882935, "learning_rate": 4.283633841886269e-06, "loss": 0.0081, "step": 8253 }, { "epoch": 5.71804641496363, "grad_norm": 0.34312424063682556, "learning_rate": 4.2829403606102635e-06, "loss": 0.0053, "step": 8254 }, { "epoch": 5.718739175614825, "grad_norm": 0.3917454481124878, "learning_rate": 4.2822468793342584e-06, "loss": 0.0065, "step": 8255 }, { "epoch": 5.71943193626602, "grad_norm": 0.2833869457244873, "learning_rate": 4.281553398058253e-06, "loss": 0.0092, "step": 8256 }, { "epoch": 5.720124696917215, "grad_norm": 0.32059454917907715, "learning_rate": 4.2808599167822475e-06, "loss": 0.0058, "step": 8257 }, { "epoch": 5.72081745756841, "grad_norm": 0.38847750425338745, "learning_rate": 4.2801664355062415e-06, "loss": 0.0135, "step": 8258 }, { "epoch": 5.721510218219605, "grad_norm": 0.4522644877433777, "learning_rate": 4.279472954230236e-06, "loss": 0.009, "step": 8259 }, { "epoch": 5.7222029788708, "grad_norm": 0.41455429792404175, "learning_rate": 4.2787794729542306e-06, "loss": 0.0106, "step": 8260 }, { "epoch": 5.722895739521995, "grad_norm": 0.3959617614746094, "learning_rate": 4.2780859916782255e-06, "loss": 0.0077, "step": 8261 }, { "epoch": 5.72358850017319, "grad_norm": 0.44272294640541077, "learning_rate": 4.27739251040222e-06, "loss": 0.01, "step": 8262 }, { "epoch": 5.724281260824386, "grad_norm": 0.3831024765968323, "learning_rate": 4.276699029126214e-06, "loss": 0.0085, "step": 8263 }, { "epoch": 5.72497402147558, "grad_norm": 0.3204551935195923, "learning_rate": 4.276005547850209e-06, "loss": 0.0071, "step": 8264 }, { "epoch": 5.725666782126775, "grad_norm": 0.25566670298576355, "learning_rate": 4.275312066574203e-06, "loss": 0.0058, "step": 8265 }, { "epoch": 5.7263595427779705, "grad_norm": 0.44943755865097046, "learning_rate": 4.274618585298198e-06, "loss": 0.006, "step": 8266 }, { "epoch": 5.727052303429165, "grad_norm": 0.32006701827049255, "learning_rate": 4.273925104022192e-06, "loss": 0.0077, "step": 8267 }, { "epoch": 5.72774506408036, "grad_norm": 0.3889787197113037, "learning_rate": 4.273231622746186e-06, "loss": 0.0065, "step": 8268 }, { "epoch": 5.7284378247315555, "grad_norm": 0.34079816937446594, "learning_rate": 4.272538141470181e-06, "loss": 0.0079, "step": 8269 }, { "epoch": 5.72913058538275, "grad_norm": 0.45281341671943665, "learning_rate": 4.271844660194175e-06, "loss": 0.0096, "step": 8270 }, { "epoch": 5.729823346033945, "grad_norm": 0.7545556426048279, "learning_rate": 4.27115117891817e-06, "loss": 0.0089, "step": 8271 }, { "epoch": 5.73051610668514, "grad_norm": 0.36646124720573425, "learning_rate": 4.270457697642164e-06, "loss": 0.009, "step": 8272 }, { "epoch": 5.731208867336335, "grad_norm": 0.2675376236438751, "learning_rate": 4.269764216366159e-06, "loss": 0.0072, "step": 8273 }, { "epoch": 5.73190162798753, "grad_norm": 0.46366944909095764, "learning_rate": 4.269070735090153e-06, "loss": 0.011, "step": 8274 }, { "epoch": 5.732594388638725, "grad_norm": 0.3251652121543884, "learning_rate": 4.268377253814147e-06, "loss": 0.008, "step": 8275 }, { "epoch": 5.733287149289921, "grad_norm": 0.48200687766075134, "learning_rate": 4.267683772538142e-06, "loss": 0.0077, "step": 8276 }, { "epoch": 5.733979909941115, "grad_norm": 0.45379742980003357, "learning_rate": 4.266990291262136e-06, "loss": 0.0109, "step": 8277 }, { "epoch": 5.73467267059231, "grad_norm": 0.4099942445755005, "learning_rate": 4.266296809986131e-06, "loss": 0.0076, "step": 8278 }, { "epoch": 5.735365431243506, "grad_norm": 0.35289695858955383, "learning_rate": 4.265603328710125e-06, "loss": 0.0055, "step": 8279 }, { "epoch": 5.7360581918947, "grad_norm": 0.37739306688308716, "learning_rate": 4.264909847434119e-06, "loss": 0.0082, "step": 8280 }, { "epoch": 5.736750952545895, "grad_norm": 0.5464060306549072, "learning_rate": 4.264216366158114e-06, "loss": 0.0074, "step": 8281 }, { "epoch": 5.737443713197091, "grad_norm": 0.3523530960083008, "learning_rate": 4.263522884882109e-06, "loss": 0.0067, "step": 8282 }, { "epoch": 5.738136473848286, "grad_norm": 0.36967411637306213, "learning_rate": 4.262829403606103e-06, "loss": 0.0072, "step": 8283 }, { "epoch": 5.73882923449948, "grad_norm": 0.4134407341480255, "learning_rate": 4.262135922330097e-06, "loss": 0.0088, "step": 8284 }, { "epoch": 5.7395219951506755, "grad_norm": 0.3396783769130707, "learning_rate": 4.261442441054092e-06, "loss": 0.0086, "step": 8285 }, { "epoch": 5.740214755801871, "grad_norm": 0.48782989382743835, "learning_rate": 4.260748959778087e-06, "loss": 0.0088, "step": 8286 }, { "epoch": 5.740907516453065, "grad_norm": 0.5654097199440002, "learning_rate": 4.260055478502081e-06, "loss": 0.0072, "step": 8287 }, { "epoch": 5.7416002771042605, "grad_norm": 0.2882134020328522, "learning_rate": 4.259361997226075e-06, "loss": 0.0076, "step": 8288 }, { "epoch": 5.742293037755456, "grad_norm": 0.3056911528110504, "learning_rate": 4.258668515950069e-06, "loss": 0.0065, "step": 8289 }, { "epoch": 5.74298579840665, "grad_norm": 0.3661186695098877, "learning_rate": 4.257975034674064e-06, "loss": 0.0099, "step": 8290 }, { "epoch": 5.743678559057845, "grad_norm": 0.30900242924690247, "learning_rate": 4.257281553398059e-06, "loss": 0.0051, "step": 8291 }, { "epoch": 5.744371319709041, "grad_norm": 0.29524505138397217, "learning_rate": 4.256588072122053e-06, "loss": 0.0057, "step": 8292 }, { "epoch": 5.745064080360235, "grad_norm": 0.25422176718711853, "learning_rate": 4.255894590846047e-06, "loss": 0.0056, "step": 8293 }, { "epoch": 5.74575684101143, "grad_norm": 0.3724766969680786, "learning_rate": 4.255201109570042e-06, "loss": 0.008, "step": 8294 }, { "epoch": 5.746449601662626, "grad_norm": 0.40852609276771545, "learning_rate": 4.254507628294036e-06, "loss": 0.0064, "step": 8295 }, { "epoch": 5.747142362313821, "grad_norm": 0.38329029083251953, "learning_rate": 4.253814147018031e-06, "loss": 0.0088, "step": 8296 }, { "epoch": 5.747835122965015, "grad_norm": 0.4243938624858856, "learning_rate": 4.253120665742025e-06, "loss": 0.0077, "step": 8297 }, { "epoch": 5.748527883616211, "grad_norm": 0.5794067978858948, "learning_rate": 4.252427184466019e-06, "loss": 0.0081, "step": 8298 }, { "epoch": 5.749220644267406, "grad_norm": 0.38931483030319214, "learning_rate": 4.251733703190014e-06, "loss": 0.0084, "step": 8299 }, { "epoch": 5.7499134049186, "grad_norm": 0.383065789937973, "learning_rate": 4.251040221914008e-06, "loss": 0.0072, "step": 8300 }, { "epoch": 5.7506061655697955, "grad_norm": 0.30684277415275574, "learning_rate": 4.250346740638003e-06, "loss": 0.0085, "step": 8301 }, { "epoch": 5.751298926220991, "grad_norm": 0.5193754434585571, "learning_rate": 4.2496532593619974e-06, "loss": 0.0082, "step": 8302 }, { "epoch": 5.751991686872186, "grad_norm": 0.3613142669200897, "learning_rate": 4.248959778085992e-06, "loss": 0.0071, "step": 8303 }, { "epoch": 5.7526844475233805, "grad_norm": 0.5218485593795776, "learning_rate": 4.2482662968099865e-06, "loss": 0.0092, "step": 8304 }, { "epoch": 5.753377208174576, "grad_norm": 0.30625155568122864, "learning_rate": 4.2475728155339805e-06, "loss": 0.0104, "step": 8305 }, { "epoch": 5.754069968825771, "grad_norm": 0.3795376718044281, "learning_rate": 4.2468793342579755e-06, "loss": 0.0075, "step": 8306 }, { "epoch": 5.754762729476965, "grad_norm": 0.38593757152557373, "learning_rate": 4.2461858529819696e-06, "loss": 0.0074, "step": 8307 }, { "epoch": 5.755455490128161, "grad_norm": 0.49432623386383057, "learning_rate": 4.2454923717059645e-06, "loss": 0.0104, "step": 8308 }, { "epoch": 5.756148250779356, "grad_norm": 0.32483917474746704, "learning_rate": 4.244798890429959e-06, "loss": 0.0072, "step": 8309 }, { "epoch": 5.75684101143055, "grad_norm": 0.4211333990097046, "learning_rate": 4.244105409153953e-06, "loss": 0.0101, "step": 8310 }, { "epoch": 5.757533772081746, "grad_norm": 0.24202485382556915, "learning_rate": 4.243411927877948e-06, "loss": 0.0058, "step": 8311 }, { "epoch": 5.758226532732941, "grad_norm": 0.3968794047832489, "learning_rate": 4.2427184466019425e-06, "loss": 0.01, "step": 8312 }, { "epoch": 5.758919293384135, "grad_norm": 0.4180951416492462, "learning_rate": 4.242024965325937e-06, "loss": 0.0143, "step": 8313 }, { "epoch": 5.759612054035331, "grad_norm": 0.2683388292789459, "learning_rate": 4.241331484049931e-06, "loss": 0.0047, "step": 8314 }, { "epoch": 5.760304814686526, "grad_norm": 0.35669052600860596, "learning_rate": 4.240638002773926e-06, "loss": 0.0102, "step": 8315 }, { "epoch": 5.760997575337721, "grad_norm": 0.6181167364120483, "learning_rate": 4.23994452149792e-06, "loss": 0.0077, "step": 8316 }, { "epoch": 5.761690335988916, "grad_norm": 0.45315179228782654, "learning_rate": 4.239251040221915e-06, "loss": 0.0105, "step": 8317 }, { "epoch": 5.762383096640111, "grad_norm": 0.38807427883148193, "learning_rate": 4.238557558945909e-06, "loss": 0.0079, "step": 8318 }, { "epoch": 5.763075857291306, "grad_norm": 0.4046410024166107, "learning_rate": 4.237864077669903e-06, "loss": 0.0126, "step": 8319 }, { "epoch": 5.7637686179425005, "grad_norm": 0.42906343936920166, "learning_rate": 4.237170596393898e-06, "loss": 0.0086, "step": 8320 }, { "epoch": 5.764461378593696, "grad_norm": 0.40467703342437744, "learning_rate": 4.236477115117893e-06, "loss": 0.0095, "step": 8321 }, { "epoch": 5.765154139244891, "grad_norm": 0.42137759923934937, "learning_rate": 4.235783633841887e-06, "loss": 0.0089, "step": 8322 }, { "epoch": 5.765846899896086, "grad_norm": 0.3476032614707947, "learning_rate": 4.235090152565881e-06, "loss": 0.01, "step": 8323 }, { "epoch": 5.766539660547281, "grad_norm": 0.37140029668807983, "learning_rate": 4.234396671289876e-06, "loss": 0.0088, "step": 8324 }, { "epoch": 5.767232421198476, "grad_norm": 0.31196829676628113, "learning_rate": 4.23370319001387e-06, "loss": 0.007, "step": 8325 }, { "epoch": 5.767925181849671, "grad_norm": 0.4869900941848755, "learning_rate": 4.233009708737865e-06, "loss": 0.0085, "step": 8326 }, { "epoch": 5.768617942500866, "grad_norm": 0.5487848520278931, "learning_rate": 4.232316227461859e-06, "loss": 0.0096, "step": 8327 }, { "epoch": 5.769310703152061, "grad_norm": 0.3647545874118805, "learning_rate": 4.231622746185853e-06, "loss": 0.009, "step": 8328 }, { "epoch": 5.770003463803256, "grad_norm": 0.47813114523887634, "learning_rate": 4.230929264909848e-06, "loss": 0.0125, "step": 8329 }, { "epoch": 5.770696224454451, "grad_norm": 0.17446501553058624, "learning_rate": 4.230235783633842e-06, "loss": 0.0038, "step": 8330 }, { "epoch": 5.771388985105646, "grad_norm": 0.35993754863739014, "learning_rate": 4.229542302357837e-06, "loss": 0.0099, "step": 8331 }, { "epoch": 5.772081745756841, "grad_norm": 1.0426440238952637, "learning_rate": 4.228848821081831e-06, "loss": 0.0098, "step": 8332 }, { "epoch": 5.772774506408036, "grad_norm": 0.36029863357543945, "learning_rate": 4.228155339805826e-06, "loss": 0.0061, "step": 8333 }, { "epoch": 5.773467267059231, "grad_norm": 0.550159752368927, "learning_rate": 4.22746185852982e-06, "loss": 0.012, "step": 8334 }, { "epoch": 5.774160027710426, "grad_norm": 0.32115405797958374, "learning_rate": 4.226768377253814e-06, "loss": 0.0076, "step": 8335 }, { "epoch": 5.774852788361621, "grad_norm": 0.5311328768730164, "learning_rate": 4.226074895977809e-06, "loss": 0.0106, "step": 8336 }, { "epoch": 5.775545549012816, "grad_norm": 0.2964485287666321, "learning_rate": 4.225381414701803e-06, "loss": 0.0073, "step": 8337 }, { "epoch": 5.776238309664011, "grad_norm": 0.28040528297424316, "learning_rate": 4.224687933425798e-06, "loss": 0.0088, "step": 8338 }, { "epoch": 5.776931070315206, "grad_norm": 0.4162251949310303, "learning_rate": 4.223994452149792e-06, "loss": 0.0087, "step": 8339 }, { "epoch": 5.777623830966401, "grad_norm": 0.27482980489730835, "learning_rate": 4.223300970873786e-06, "loss": 0.0058, "step": 8340 }, { "epoch": 5.778316591617596, "grad_norm": 0.4677090048789978, "learning_rate": 4.222607489597781e-06, "loss": 0.0079, "step": 8341 }, { "epoch": 5.779009352268791, "grad_norm": 0.4626348912715912, "learning_rate": 4.221914008321776e-06, "loss": 0.0071, "step": 8342 }, { "epoch": 5.779702112919987, "grad_norm": 0.5672571063041687, "learning_rate": 4.22122052704577e-06, "loss": 0.0071, "step": 8343 }, { "epoch": 5.780394873571181, "grad_norm": 0.37879785895347595, "learning_rate": 4.220527045769764e-06, "loss": 0.0074, "step": 8344 }, { "epoch": 5.781087634222376, "grad_norm": 0.3782157599925995, "learning_rate": 4.219833564493758e-06, "loss": 0.007, "step": 8345 }, { "epoch": 5.7817803948735715, "grad_norm": 0.3514493703842163, "learning_rate": 4.219140083217753e-06, "loss": 0.0076, "step": 8346 }, { "epoch": 5.782473155524766, "grad_norm": 0.33222493529319763, "learning_rate": 4.218446601941748e-06, "loss": 0.0074, "step": 8347 }, { "epoch": 5.783165916175961, "grad_norm": 0.4822169244289398, "learning_rate": 4.217753120665742e-06, "loss": 0.0077, "step": 8348 }, { "epoch": 5.7838586768271565, "grad_norm": 0.31460967659950256, "learning_rate": 4.2170596393897364e-06, "loss": 0.0068, "step": 8349 }, { "epoch": 5.784551437478351, "grad_norm": 0.27153342962265015, "learning_rate": 4.216366158113731e-06, "loss": 0.0071, "step": 8350 }, { "epoch": 5.785244198129546, "grad_norm": 0.8966512680053711, "learning_rate": 4.215672676837726e-06, "loss": 0.0084, "step": 8351 }, { "epoch": 5.785936958780741, "grad_norm": 0.48668205738067627, "learning_rate": 4.21497919556172e-06, "loss": 0.0103, "step": 8352 }, { "epoch": 5.786629719431936, "grad_norm": 0.422843337059021, "learning_rate": 4.2142857142857145e-06, "loss": 0.0086, "step": 8353 }, { "epoch": 5.787322480083131, "grad_norm": 0.3376331031322479, "learning_rate": 4.213592233009709e-06, "loss": 0.0079, "step": 8354 }, { "epoch": 5.788015240734326, "grad_norm": 0.5531777739524841, "learning_rate": 4.2128987517337035e-06, "loss": 0.0111, "step": 8355 }, { "epoch": 5.788708001385522, "grad_norm": 0.5027947425842285, "learning_rate": 4.2122052704576984e-06, "loss": 0.0107, "step": 8356 }, { "epoch": 5.789400762036716, "grad_norm": 0.4449463486671448, "learning_rate": 4.2115117891816925e-06, "loss": 0.0092, "step": 8357 }, { "epoch": 5.790093522687911, "grad_norm": 0.3338002860546112, "learning_rate": 4.210818307905687e-06, "loss": 0.0092, "step": 8358 }, { "epoch": 5.790786283339107, "grad_norm": 0.8274686932563782, "learning_rate": 4.2101248266296815e-06, "loss": 0.0122, "step": 8359 }, { "epoch": 5.791479043990301, "grad_norm": 0.49187445640563965, "learning_rate": 4.209431345353676e-06, "loss": 0.008, "step": 8360 }, { "epoch": 5.792171804641496, "grad_norm": 0.3872314989566803, "learning_rate": 4.2087378640776706e-06, "loss": 0.0065, "step": 8361 }, { "epoch": 5.792864565292692, "grad_norm": 0.39518681168556213, "learning_rate": 4.208044382801665e-06, "loss": 0.0103, "step": 8362 }, { "epoch": 5.793557325943887, "grad_norm": 0.4513131082057953, "learning_rate": 4.2073509015256596e-06, "loss": 0.0077, "step": 8363 }, { "epoch": 5.794250086595081, "grad_norm": 0.4230335056781769, "learning_rate": 4.206657420249654e-06, "loss": 0.0069, "step": 8364 }, { "epoch": 5.7949428472462765, "grad_norm": 0.44150733947753906, "learning_rate": 4.205963938973648e-06, "loss": 0.007, "step": 8365 }, { "epoch": 5.795635607897472, "grad_norm": 0.3151010572910309, "learning_rate": 4.205270457697643e-06, "loss": 0.0067, "step": 8366 }, { "epoch": 5.796328368548666, "grad_norm": 0.31523779034614563, "learning_rate": 4.204576976421637e-06, "loss": 0.0086, "step": 8367 }, { "epoch": 5.7970211291998615, "grad_norm": 0.34788498282432556, "learning_rate": 4.203883495145632e-06, "loss": 0.0081, "step": 8368 }, { "epoch": 5.797713889851057, "grad_norm": 0.6555057764053345, "learning_rate": 4.203190013869626e-06, "loss": 0.0103, "step": 8369 }, { "epoch": 5.798406650502251, "grad_norm": 0.47564446926116943, "learning_rate": 4.20249653259362e-06, "loss": 0.0058, "step": 8370 }, { "epoch": 5.799099411153446, "grad_norm": 0.3363701105117798, "learning_rate": 4.201803051317615e-06, "loss": 0.0094, "step": 8371 }, { "epoch": 5.799792171804642, "grad_norm": 0.2784354090690613, "learning_rate": 4.20110957004161e-06, "loss": 0.0064, "step": 8372 }, { "epoch": 5.800484932455836, "grad_norm": 0.3875786364078522, "learning_rate": 4.200416088765604e-06, "loss": 0.0077, "step": 8373 }, { "epoch": 5.801177693107031, "grad_norm": 0.373428076505661, "learning_rate": 4.199722607489598e-06, "loss": 0.0093, "step": 8374 }, { "epoch": 5.801870453758227, "grad_norm": 0.41150420904159546, "learning_rate": 4.199029126213592e-06, "loss": 0.0109, "step": 8375 }, { "epoch": 5.802563214409422, "grad_norm": 0.3832527697086334, "learning_rate": 4.198335644937587e-06, "loss": 0.0081, "step": 8376 }, { "epoch": 5.803255975060616, "grad_norm": 0.3092479109764099, "learning_rate": 4.197642163661582e-06, "loss": 0.0061, "step": 8377 }, { "epoch": 5.803948735711812, "grad_norm": 0.6134652495384216, "learning_rate": 4.196948682385576e-06, "loss": 0.0085, "step": 8378 }, { "epoch": 5.804641496363007, "grad_norm": 0.3552073538303375, "learning_rate": 4.19625520110957e-06, "loss": 0.0101, "step": 8379 }, { "epoch": 5.805334257014201, "grad_norm": 0.3787483274936676, "learning_rate": 4.195561719833565e-06, "loss": 0.0064, "step": 8380 }, { "epoch": 5.8060270176653965, "grad_norm": 0.3213479518890381, "learning_rate": 4.19486823855756e-06, "loss": 0.009, "step": 8381 }, { "epoch": 5.806719778316592, "grad_norm": 0.5946488380432129, "learning_rate": 4.194174757281554e-06, "loss": 0.0085, "step": 8382 }, { "epoch": 5.807412538967787, "grad_norm": 0.4807218611240387, "learning_rate": 4.193481276005548e-06, "loss": 0.0102, "step": 8383 }, { "epoch": 5.8081052996189815, "grad_norm": 0.367820680141449, "learning_rate": 4.192787794729542e-06, "loss": 0.0088, "step": 8384 }, { "epoch": 5.808798060270177, "grad_norm": 0.4575831890106201, "learning_rate": 4.192094313453537e-06, "loss": 0.0127, "step": 8385 }, { "epoch": 5.809490820921372, "grad_norm": 0.3799903988838196, "learning_rate": 4.191400832177532e-06, "loss": 0.0084, "step": 8386 }, { "epoch": 5.810183581572566, "grad_norm": 0.3574175536632538, "learning_rate": 4.190707350901526e-06, "loss": 0.0059, "step": 8387 }, { "epoch": 5.810876342223762, "grad_norm": 0.44677236676216125, "learning_rate": 4.19001386962552e-06, "loss": 0.0107, "step": 8388 }, { "epoch": 5.811569102874957, "grad_norm": 0.40432292222976685, "learning_rate": 4.189320388349515e-06, "loss": 0.0089, "step": 8389 }, { "epoch": 5.812261863526151, "grad_norm": 0.34104475378990173, "learning_rate": 4.188626907073509e-06, "loss": 0.0075, "step": 8390 }, { "epoch": 5.812954624177347, "grad_norm": 0.4250873625278473, "learning_rate": 4.187933425797504e-06, "loss": 0.011, "step": 8391 }, { "epoch": 5.813647384828542, "grad_norm": 0.2522217333316803, "learning_rate": 4.187239944521498e-06, "loss": 0.0067, "step": 8392 }, { "epoch": 5.814340145479736, "grad_norm": 0.415157675743103, "learning_rate": 4.186546463245492e-06, "loss": 0.0062, "step": 8393 }, { "epoch": 5.815032906130932, "grad_norm": 0.6287251114845276, "learning_rate": 4.185852981969487e-06, "loss": 0.0104, "step": 8394 }, { "epoch": 5.815725666782127, "grad_norm": 0.3798479735851288, "learning_rate": 4.185159500693481e-06, "loss": 0.0073, "step": 8395 }, { "epoch": 5.816418427433322, "grad_norm": 0.34912198781967163, "learning_rate": 4.184466019417476e-06, "loss": 0.0072, "step": 8396 }, { "epoch": 5.8171111880845165, "grad_norm": 0.39099520444869995, "learning_rate": 4.18377253814147e-06, "loss": 0.0074, "step": 8397 }, { "epoch": 5.817803948735712, "grad_norm": 0.458638072013855, "learning_rate": 4.183079056865465e-06, "loss": 0.0076, "step": 8398 }, { "epoch": 5.818496709386907, "grad_norm": 0.3762831687927246, "learning_rate": 4.182385575589459e-06, "loss": 0.007, "step": 8399 }, { "epoch": 5.8191894700381015, "grad_norm": 0.381137877702713, "learning_rate": 4.1816920943134535e-06, "loss": 0.0099, "step": 8400 }, { "epoch": 5.819882230689297, "grad_norm": 0.4109715223312378, "learning_rate": 4.180998613037448e-06, "loss": 0.0091, "step": 8401 }, { "epoch": 5.820574991340492, "grad_norm": 0.4500003159046173, "learning_rate": 4.180305131761443e-06, "loss": 0.0117, "step": 8402 }, { "epoch": 5.821267751991687, "grad_norm": 0.49851664900779724, "learning_rate": 4.1796116504854374e-06, "loss": 0.0115, "step": 8403 }, { "epoch": 5.821960512642882, "grad_norm": 0.29713866114616394, "learning_rate": 4.1789181692094315e-06, "loss": 0.0063, "step": 8404 }, { "epoch": 5.822653273294077, "grad_norm": 0.34780338406562805, "learning_rate": 4.178224687933426e-06, "loss": 0.009, "step": 8405 }, { "epoch": 5.823346033945272, "grad_norm": 0.3469697833061218, "learning_rate": 4.1775312066574205e-06, "loss": 0.0081, "step": 8406 }, { "epoch": 5.824038794596467, "grad_norm": 0.27404332160949707, "learning_rate": 4.1768377253814155e-06, "loss": 0.0055, "step": 8407 }, { "epoch": 5.824731555247662, "grad_norm": 0.30559876561164856, "learning_rate": 4.1761442441054096e-06, "loss": 0.0069, "step": 8408 }, { "epoch": 5.825424315898857, "grad_norm": 0.38184332847595215, "learning_rate": 4.175450762829404e-06, "loss": 0.0088, "step": 8409 }, { "epoch": 5.826117076550052, "grad_norm": 0.24550074338912964, "learning_rate": 4.1747572815533986e-06, "loss": 0.0055, "step": 8410 }, { "epoch": 5.826809837201247, "grad_norm": 0.29619890451431274, "learning_rate": 4.1740638002773935e-06, "loss": 0.0061, "step": 8411 }, { "epoch": 5.827502597852442, "grad_norm": 0.45778506994247437, "learning_rate": 4.173370319001388e-06, "loss": 0.0074, "step": 8412 }, { "epoch": 5.828195358503637, "grad_norm": 0.3315967917442322, "learning_rate": 4.172676837725382e-06, "loss": 0.0074, "step": 8413 }, { "epoch": 5.828888119154832, "grad_norm": 0.31582432985305786, "learning_rate": 4.171983356449376e-06, "loss": 0.0058, "step": 8414 }, { "epoch": 5.829580879806027, "grad_norm": 0.3864191174507141, "learning_rate": 4.171289875173371e-06, "loss": 0.0085, "step": 8415 }, { "epoch": 5.830273640457222, "grad_norm": 0.34668925404548645, "learning_rate": 4.170596393897366e-06, "loss": 0.0097, "step": 8416 }, { "epoch": 5.830966401108417, "grad_norm": 0.4442277252674103, "learning_rate": 4.16990291262136e-06, "loss": 0.0094, "step": 8417 }, { "epoch": 5.831659161759612, "grad_norm": 0.23464812338352203, "learning_rate": 4.169209431345354e-06, "loss": 0.0051, "step": 8418 }, { "epoch": 5.832351922410807, "grad_norm": 0.26087555289268494, "learning_rate": 4.168515950069349e-06, "loss": 0.0071, "step": 8419 }, { "epoch": 5.833044683062002, "grad_norm": 0.3434668481349945, "learning_rate": 4.167822468793343e-06, "loss": 0.0071, "step": 8420 }, { "epoch": 5.833737443713197, "grad_norm": 0.4209461808204651, "learning_rate": 4.167128987517338e-06, "loss": 0.0077, "step": 8421 }, { "epoch": 5.834430204364392, "grad_norm": 0.417118102312088, "learning_rate": 4.166435506241332e-06, "loss": 0.011, "step": 8422 }, { "epoch": 5.835122965015588, "grad_norm": 0.406374454498291, "learning_rate": 4.165742024965326e-06, "loss": 0.0069, "step": 8423 }, { "epoch": 5.835815725666782, "grad_norm": 0.4506526589393616, "learning_rate": 4.165048543689321e-06, "loss": 0.0105, "step": 8424 }, { "epoch": 5.836508486317977, "grad_norm": 0.24989177286624908, "learning_rate": 4.164355062413315e-06, "loss": 0.0056, "step": 8425 }, { "epoch": 5.8372012469691725, "grad_norm": 0.45859843492507935, "learning_rate": 4.16366158113731e-06, "loss": 0.0087, "step": 8426 }, { "epoch": 5.837894007620367, "grad_norm": 0.3756405711174011, "learning_rate": 4.162968099861304e-06, "loss": 0.0068, "step": 8427 }, { "epoch": 5.838586768271562, "grad_norm": 0.34702837467193604, "learning_rate": 4.162274618585299e-06, "loss": 0.0058, "step": 8428 }, { "epoch": 5.8392795289227575, "grad_norm": 0.28181061148643494, "learning_rate": 4.161581137309293e-06, "loss": 0.006, "step": 8429 }, { "epoch": 5.839972289573952, "grad_norm": 0.2980850040912628, "learning_rate": 4.160887656033287e-06, "loss": 0.0083, "step": 8430 }, { "epoch": 5.840665050225147, "grad_norm": 0.408311665058136, "learning_rate": 4.160194174757282e-06, "loss": 0.0072, "step": 8431 }, { "epoch": 5.841357810876342, "grad_norm": 0.2777591943740845, "learning_rate": 4.159500693481276e-06, "loss": 0.0061, "step": 8432 }, { "epoch": 5.842050571527537, "grad_norm": 0.36505457758903503, "learning_rate": 4.158807212205271e-06, "loss": 0.0071, "step": 8433 }, { "epoch": 5.842743332178732, "grad_norm": 0.32132652401924133, "learning_rate": 4.158113730929265e-06, "loss": 0.0075, "step": 8434 }, { "epoch": 5.843436092829927, "grad_norm": 0.24043220281600952, "learning_rate": 4.157420249653259e-06, "loss": 0.0052, "step": 8435 }, { "epoch": 5.844128853481123, "grad_norm": 0.4256744682788849, "learning_rate": 4.156726768377254e-06, "loss": 0.0071, "step": 8436 }, { "epoch": 5.844821614132317, "grad_norm": 0.4054756760597229, "learning_rate": 4.156033287101249e-06, "loss": 0.0088, "step": 8437 }, { "epoch": 5.845514374783512, "grad_norm": 0.37333884835243225, "learning_rate": 4.155339805825243e-06, "loss": 0.0092, "step": 8438 }, { "epoch": 5.846207135434708, "grad_norm": 0.4519575834274292, "learning_rate": 4.154646324549237e-06, "loss": 0.01, "step": 8439 }, { "epoch": 5.846899896085902, "grad_norm": 0.362836092710495, "learning_rate": 4.153952843273232e-06, "loss": 0.0082, "step": 8440 }, { "epoch": 5.847592656737097, "grad_norm": 0.4783961772918701, "learning_rate": 4.153259361997226e-06, "loss": 0.0119, "step": 8441 }, { "epoch": 5.8482854173882926, "grad_norm": 0.5307455062866211, "learning_rate": 4.152565880721221e-06, "loss": 0.0095, "step": 8442 }, { "epoch": 5.848978178039488, "grad_norm": 0.3113182485103607, "learning_rate": 4.151872399445215e-06, "loss": 0.0078, "step": 8443 }, { "epoch": 5.849670938690682, "grad_norm": 0.41187363862991333, "learning_rate": 4.151178918169209e-06, "loss": 0.007, "step": 8444 }, { "epoch": 5.8503636993418775, "grad_norm": 0.3689092993736267, "learning_rate": 4.150485436893204e-06, "loss": 0.0092, "step": 8445 }, { "epoch": 5.851056459993073, "grad_norm": 0.20535829663276672, "learning_rate": 4.149791955617199e-06, "loss": 0.004, "step": 8446 }, { "epoch": 5.851749220644267, "grad_norm": 0.4785582721233368, "learning_rate": 4.149098474341193e-06, "loss": 0.0106, "step": 8447 }, { "epoch": 5.8524419812954624, "grad_norm": 0.5107148885726929, "learning_rate": 4.148404993065187e-06, "loss": 0.0078, "step": 8448 }, { "epoch": 5.853134741946658, "grad_norm": 0.4586354196071625, "learning_rate": 4.147711511789182e-06, "loss": 0.0082, "step": 8449 }, { "epoch": 5.853827502597852, "grad_norm": 0.21495041251182556, "learning_rate": 4.1470180305131764e-06, "loss": 0.0047, "step": 8450 }, { "epoch": 5.854520263249047, "grad_norm": 0.4510405957698822, "learning_rate": 4.146324549237171e-06, "loss": 0.0087, "step": 8451 }, { "epoch": 5.855213023900243, "grad_norm": 0.8113586902618408, "learning_rate": 4.1456310679611654e-06, "loss": 0.0131, "step": 8452 }, { "epoch": 5.855905784551437, "grad_norm": 0.41880154609680176, "learning_rate": 4.1449375866851595e-06, "loss": 0.0072, "step": 8453 }, { "epoch": 5.856598545202632, "grad_norm": 0.32437968254089355, "learning_rate": 4.1442441054091545e-06, "loss": 0.0075, "step": 8454 }, { "epoch": 5.857291305853828, "grad_norm": 0.27792876958847046, "learning_rate": 4.1435506241331486e-06, "loss": 0.0063, "step": 8455 }, { "epoch": 5.857984066505023, "grad_norm": 0.32808801531791687, "learning_rate": 4.1428571428571435e-06, "loss": 0.0078, "step": 8456 }, { "epoch": 5.858676827156217, "grad_norm": 0.34345778822898865, "learning_rate": 4.1421636615811376e-06, "loss": 0.0054, "step": 8457 }, { "epoch": 5.859369587807413, "grad_norm": 0.29215386509895325, "learning_rate": 4.1414701803051325e-06, "loss": 0.0059, "step": 8458 }, { "epoch": 5.860062348458608, "grad_norm": 0.4607574939727783, "learning_rate": 4.140776699029127e-06, "loss": 0.0086, "step": 8459 }, { "epoch": 5.860755109109802, "grad_norm": 0.3328019678592682, "learning_rate": 4.140083217753121e-06, "loss": 0.0085, "step": 8460 }, { "epoch": 5.8614478697609975, "grad_norm": 0.22415997087955475, "learning_rate": 4.139389736477116e-06, "loss": 0.0049, "step": 8461 }, { "epoch": 5.862140630412193, "grad_norm": 0.43069514632225037, "learning_rate": 4.13869625520111e-06, "loss": 0.0086, "step": 8462 }, { "epoch": 5.862833391063388, "grad_norm": 0.5360902547836304, "learning_rate": 4.138002773925105e-06, "loss": 0.0069, "step": 8463 }, { "epoch": 5.8635261517145825, "grad_norm": 0.4504351317882538, "learning_rate": 4.137309292649099e-06, "loss": 0.0083, "step": 8464 }, { "epoch": 5.864218912365778, "grad_norm": 0.4289409816265106, "learning_rate": 4.136615811373093e-06, "loss": 0.0066, "step": 8465 }, { "epoch": 5.864911673016973, "grad_norm": 0.2925226092338562, "learning_rate": 4.135922330097088e-06, "loss": 0.0064, "step": 8466 }, { "epoch": 5.865604433668167, "grad_norm": 0.4810274541378021, "learning_rate": 4.135228848821083e-06, "loss": 0.0073, "step": 8467 }, { "epoch": 5.866297194319363, "grad_norm": 0.44315850734710693, "learning_rate": 4.134535367545077e-06, "loss": 0.0098, "step": 8468 }, { "epoch": 5.866989954970558, "grad_norm": 0.4919215142726898, "learning_rate": 4.133841886269071e-06, "loss": 0.0092, "step": 8469 }, { "epoch": 5.867682715621752, "grad_norm": 0.33163192868232727, "learning_rate": 4.133148404993066e-06, "loss": 0.0087, "step": 8470 }, { "epoch": 5.868375476272948, "grad_norm": 0.34440740942955017, "learning_rate": 4.13245492371706e-06, "loss": 0.008, "step": 8471 }, { "epoch": 5.869068236924143, "grad_norm": 0.3269757032394409, "learning_rate": 4.131761442441055e-06, "loss": 0.0076, "step": 8472 }, { "epoch": 5.869760997575337, "grad_norm": 0.5475027561187744, "learning_rate": 4.131067961165049e-06, "loss": 0.0097, "step": 8473 }, { "epoch": 5.870453758226533, "grad_norm": 0.461850106716156, "learning_rate": 4.130374479889043e-06, "loss": 0.0095, "step": 8474 }, { "epoch": 5.871146518877728, "grad_norm": 0.3912562131881714, "learning_rate": 4.129680998613038e-06, "loss": 0.0085, "step": 8475 }, { "epoch": 5.871839279528922, "grad_norm": 0.38020938634872437, "learning_rate": 4.128987517337033e-06, "loss": 0.0092, "step": 8476 }, { "epoch": 5.8725320401801175, "grad_norm": 0.4405178129673004, "learning_rate": 4.128294036061027e-06, "loss": 0.0087, "step": 8477 }, { "epoch": 5.873224800831313, "grad_norm": 0.3280518054962158, "learning_rate": 4.127600554785021e-06, "loss": 0.0053, "step": 8478 }, { "epoch": 5.873917561482508, "grad_norm": 0.3536124527454376, "learning_rate": 4.126907073509016e-06, "loss": 0.0073, "step": 8479 }, { "epoch": 5.8746103221337025, "grad_norm": 0.38063618540763855, "learning_rate": 4.12621359223301e-06, "loss": 0.0056, "step": 8480 }, { "epoch": 5.875303082784898, "grad_norm": 0.35610726475715637, "learning_rate": 4.125520110957005e-06, "loss": 0.0072, "step": 8481 }, { "epoch": 5.875995843436093, "grad_norm": 0.3490633964538574, "learning_rate": 4.124826629680999e-06, "loss": 0.006, "step": 8482 }, { "epoch": 5.876688604087288, "grad_norm": 0.39876359701156616, "learning_rate": 4.124133148404993e-06, "loss": 0.007, "step": 8483 }, { "epoch": 5.877381364738483, "grad_norm": 0.3289647400379181, "learning_rate": 4.123439667128988e-06, "loss": 0.0059, "step": 8484 }, { "epoch": 5.878074125389678, "grad_norm": 0.4408869445323944, "learning_rate": 4.122746185852982e-06, "loss": 0.0073, "step": 8485 }, { "epoch": 5.878766886040873, "grad_norm": 0.43645215034484863, "learning_rate": 4.122052704576976e-06, "loss": 0.0114, "step": 8486 }, { "epoch": 5.879459646692068, "grad_norm": 0.48829320073127747, "learning_rate": 4.121359223300971e-06, "loss": 0.0079, "step": 8487 }, { "epoch": 5.880152407343263, "grad_norm": 0.5117582082748413, "learning_rate": 4.120665742024966e-06, "loss": 0.0124, "step": 8488 }, { "epoch": 5.880845167994458, "grad_norm": 0.31155481934547424, "learning_rate": 4.11997226074896e-06, "loss": 0.0072, "step": 8489 }, { "epoch": 5.881537928645653, "grad_norm": 0.4325661063194275, "learning_rate": 4.119278779472954e-06, "loss": 0.0069, "step": 8490 }, { "epoch": 5.882230689296848, "grad_norm": 0.2997148334980011, "learning_rate": 4.118585298196948e-06, "loss": 0.0062, "step": 8491 }, { "epoch": 5.882923449948043, "grad_norm": 0.25914114713668823, "learning_rate": 4.117891816920943e-06, "loss": 0.0043, "step": 8492 }, { "epoch": 5.883616210599238, "grad_norm": 0.30932387709617615, "learning_rate": 4.117198335644938e-06, "loss": 0.0066, "step": 8493 }, { "epoch": 5.884308971250433, "grad_norm": 0.3908243179321289, "learning_rate": 4.116504854368932e-06, "loss": 0.0062, "step": 8494 }, { "epoch": 5.885001731901628, "grad_norm": 0.28193148970603943, "learning_rate": 4.115811373092926e-06, "loss": 0.0068, "step": 8495 }, { "epoch": 5.8856944925528225, "grad_norm": 0.3283005952835083, "learning_rate": 4.115117891816921e-06, "loss": 0.0056, "step": 8496 }, { "epoch": 5.886387253204018, "grad_norm": 0.5039186477661133, "learning_rate": 4.114424410540916e-06, "loss": 0.0118, "step": 8497 }, { "epoch": 5.887080013855213, "grad_norm": 0.4128674268722534, "learning_rate": 4.11373092926491e-06, "loss": 0.0095, "step": 8498 }, { "epoch": 5.887772774506408, "grad_norm": 0.2646946609020233, "learning_rate": 4.1130374479889044e-06, "loss": 0.0054, "step": 8499 }, { "epoch": 5.888465535157603, "grad_norm": 0.5354933142662048, "learning_rate": 4.1123439667128985e-06, "loss": 0.0107, "step": 8500 }, { "epoch": 5.889158295808798, "grad_norm": 0.41395577788352966, "learning_rate": 4.1116504854368935e-06, "loss": 0.0084, "step": 8501 }, { "epoch": 5.889851056459993, "grad_norm": 0.43025678396224976, "learning_rate": 4.110957004160888e-06, "loss": 0.0112, "step": 8502 }, { "epoch": 5.890543817111189, "grad_norm": 0.2974132299423218, "learning_rate": 4.1102635228848825e-06, "loss": 0.0077, "step": 8503 }, { "epoch": 5.891236577762383, "grad_norm": 0.41498416662216187, "learning_rate": 4.1095700416088766e-06, "loss": 0.0085, "step": 8504 }, { "epoch": 5.891929338413578, "grad_norm": 0.31983187794685364, "learning_rate": 4.1088765603328715e-06, "loss": 0.0087, "step": 8505 }, { "epoch": 5.8926220990647735, "grad_norm": 0.44971662759780884, "learning_rate": 4.108183079056866e-06, "loss": 0.0086, "step": 8506 }, { "epoch": 5.893314859715968, "grad_norm": 0.5393744111061096, "learning_rate": 4.1074895977808605e-06, "loss": 0.0096, "step": 8507 }, { "epoch": 5.894007620367163, "grad_norm": 0.42491066455841064, "learning_rate": 4.106796116504855e-06, "loss": 0.0075, "step": 8508 }, { "epoch": 5.8947003810183585, "grad_norm": 0.44292858242988586, "learning_rate": 4.106102635228849e-06, "loss": 0.0092, "step": 8509 }, { "epoch": 5.895393141669553, "grad_norm": 0.4139257073402405, "learning_rate": 4.105409153952844e-06, "loss": 0.0076, "step": 8510 }, { "epoch": 5.896085902320748, "grad_norm": 0.40488094091415405, "learning_rate": 4.104715672676838e-06, "loss": 0.009, "step": 8511 }, { "epoch": 5.896778662971943, "grad_norm": 0.46385952830314636, "learning_rate": 4.104022191400833e-06, "loss": 0.0096, "step": 8512 }, { "epoch": 5.897471423623138, "grad_norm": 0.2889416515827179, "learning_rate": 4.103328710124827e-06, "loss": 0.0061, "step": 8513 }, { "epoch": 5.898164184274333, "grad_norm": 0.29356223344802856, "learning_rate": 4.102635228848822e-06, "loss": 0.0054, "step": 8514 }, { "epoch": 5.898856944925528, "grad_norm": 0.46368181705474854, "learning_rate": 4.101941747572816e-06, "loss": 0.0076, "step": 8515 }, { "epoch": 5.899549705576723, "grad_norm": 0.3494319021701813, "learning_rate": 4.10124826629681e-06, "loss": 0.0071, "step": 8516 }, { "epoch": 5.900242466227918, "grad_norm": 0.3383325934410095, "learning_rate": 4.100554785020805e-06, "loss": 0.0062, "step": 8517 }, { "epoch": 5.900935226879113, "grad_norm": 0.37720346450805664, "learning_rate": 4.0998613037448e-06, "loss": 0.0068, "step": 8518 }, { "epoch": 5.901627987530309, "grad_norm": 0.33118075132369995, "learning_rate": 4.099167822468794e-06, "loss": 0.0069, "step": 8519 }, { "epoch": 5.902320748181503, "grad_norm": 0.39278867840766907, "learning_rate": 4.098474341192788e-06, "loss": 0.0083, "step": 8520 }, { "epoch": 5.903013508832698, "grad_norm": 0.3461000323295593, "learning_rate": 4.097780859916782e-06, "loss": 0.0078, "step": 8521 }, { "epoch": 5.9037062694838935, "grad_norm": 0.35026121139526367, "learning_rate": 4.097087378640777e-06, "loss": 0.0101, "step": 8522 }, { "epoch": 5.904399030135089, "grad_norm": 0.4571658968925476, "learning_rate": 4.096393897364772e-06, "loss": 0.0094, "step": 8523 }, { "epoch": 5.905091790786283, "grad_norm": 0.4728372097015381, "learning_rate": 4.095700416088766e-06, "loss": 0.0081, "step": 8524 }, { "epoch": 5.9057845514374785, "grad_norm": 0.35921093821525574, "learning_rate": 4.09500693481276e-06, "loss": 0.0075, "step": 8525 }, { "epoch": 5.906477312088674, "grad_norm": 0.2806636095046997, "learning_rate": 4.094313453536755e-06, "loss": 0.0057, "step": 8526 }, { "epoch": 5.907170072739868, "grad_norm": 0.37967145442962646, "learning_rate": 4.09361997226075e-06, "loss": 0.0082, "step": 8527 }, { "epoch": 5.907862833391063, "grad_norm": 0.30859291553497314, "learning_rate": 4.092926490984744e-06, "loss": 0.007, "step": 8528 }, { "epoch": 5.908555594042259, "grad_norm": 0.4443950057029724, "learning_rate": 4.092233009708738e-06, "loss": 0.0059, "step": 8529 }, { "epoch": 5.909248354693453, "grad_norm": 0.20901168882846832, "learning_rate": 4.091539528432732e-06, "loss": 0.006, "step": 8530 }, { "epoch": 5.909941115344648, "grad_norm": 0.2895027995109558, "learning_rate": 4.090846047156727e-06, "loss": 0.0053, "step": 8531 }, { "epoch": 5.910633875995844, "grad_norm": 0.41502273082733154, "learning_rate": 4.090152565880722e-06, "loss": 0.0098, "step": 8532 }, { "epoch": 5.911326636647038, "grad_norm": 0.35625556111335754, "learning_rate": 4.089459084604716e-06, "loss": 0.0079, "step": 8533 }, { "epoch": 5.912019397298233, "grad_norm": 0.714909017086029, "learning_rate": 4.08876560332871e-06, "loss": 0.0085, "step": 8534 }, { "epoch": 5.912712157949429, "grad_norm": 0.39728614687919617, "learning_rate": 4.088072122052705e-06, "loss": 0.0089, "step": 8535 }, { "epoch": 5.913404918600623, "grad_norm": 0.45335593819618225, "learning_rate": 4.087378640776699e-06, "loss": 0.0103, "step": 8536 }, { "epoch": 5.914097679251818, "grad_norm": 0.3070891797542572, "learning_rate": 4.086685159500694e-06, "loss": 0.0067, "step": 8537 }, { "epoch": 5.914790439903014, "grad_norm": 0.5755982995033264, "learning_rate": 4.085991678224688e-06, "loss": 0.0096, "step": 8538 }, { "epoch": 5.915483200554209, "grad_norm": 0.4692467451095581, "learning_rate": 4.085298196948682e-06, "loss": 0.0122, "step": 8539 }, { "epoch": 5.916175961205403, "grad_norm": 0.48478642106056213, "learning_rate": 4.084604715672677e-06, "loss": 0.0109, "step": 8540 }, { "epoch": 5.9168687218565985, "grad_norm": 0.2915709316730499, "learning_rate": 4.083911234396671e-06, "loss": 0.0092, "step": 8541 }, { "epoch": 5.917561482507794, "grad_norm": 0.3141952157020569, "learning_rate": 4.083217753120666e-06, "loss": 0.0057, "step": 8542 }, { "epoch": 5.918254243158988, "grad_norm": 0.3410860598087311, "learning_rate": 4.08252427184466e-06, "loss": 0.0056, "step": 8543 }, { "epoch": 5.9189470038101835, "grad_norm": 0.3486231863498688, "learning_rate": 4.081830790568655e-06, "loss": 0.0092, "step": 8544 }, { "epoch": 5.919639764461379, "grad_norm": 0.2818567454814911, "learning_rate": 4.081137309292649e-06, "loss": 0.0068, "step": 8545 }, { "epoch": 5.920332525112574, "grad_norm": 0.4283974766731262, "learning_rate": 4.0804438280166434e-06, "loss": 0.0129, "step": 8546 }, { "epoch": 5.921025285763768, "grad_norm": 0.44637128710746765, "learning_rate": 4.079750346740638e-06, "loss": 0.0062, "step": 8547 }, { "epoch": 5.921718046414964, "grad_norm": 0.34497445821762085, "learning_rate": 4.0790568654646325e-06, "loss": 0.0076, "step": 8548 }, { "epoch": 5.922410807066159, "grad_norm": 0.2993614971637726, "learning_rate": 4.078363384188627e-06, "loss": 0.0064, "step": 8549 }, { "epoch": 5.923103567717353, "grad_norm": 0.2649892568588257, "learning_rate": 4.0776699029126215e-06, "loss": 0.0055, "step": 8550 }, { "epoch": 5.923796328368549, "grad_norm": 0.47700437903404236, "learning_rate": 4.0769764216366156e-06, "loss": 0.0097, "step": 8551 }, { "epoch": 5.924489089019744, "grad_norm": 0.539458155632019, "learning_rate": 4.0762829403606105e-06, "loss": 0.0078, "step": 8552 }, { "epoch": 5.925181849670938, "grad_norm": 0.34400475025177, "learning_rate": 4.0755894590846054e-06, "loss": 0.0071, "step": 8553 }, { "epoch": 5.925874610322134, "grad_norm": 0.518405556678772, "learning_rate": 4.0748959778085995e-06, "loss": 0.0073, "step": 8554 }, { "epoch": 5.926567370973329, "grad_norm": 0.397615522146225, "learning_rate": 4.074202496532594e-06, "loss": 0.008, "step": 8555 }, { "epoch": 5.927260131624523, "grad_norm": 0.3987257480621338, "learning_rate": 4.0735090152565885e-06, "loss": 0.0085, "step": 8556 }, { "epoch": 5.9279528922757185, "grad_norm": 0.5365645289421082, "learning_rate": 4.072815533980583e-06, "loss": 0.0071, "step": 8557 }, { "epoch": 5.928645652926914, "grad_norm": 0.2973618805408478, "learning_rate": 4.0721220527045776e-06, "loss": 0.0054, "step": 8558 }, { "epoch": 5.929338413578109, "grad_norm": 0.4930473566055298, "learning_rate": 4.071428571428572e-06, "loss": 0.0103, "step": 8559 }, { "epoch": 5.9300311742293035, "grad_norm": 0.3181535303592682, "learning_rate": 4.070735090152566e-06, "loss": 0.0066, "step": 8560 }, { "epoch": 5.930723934880499, "grad_norm": 0.2664131224155426, "learning_rate": 4.070041608876561e-06, "loss": 0.0053, "step": 8561 }, { "epoch": 5.931416695531694, "grad_norm": 0.40227028727531433, "learning_rate": 4.069348127600556e-06, "loss": 0.0057, "step": 8562 }, { "epoch": 5.932109456182888, "grad_norm": 0.6118664145469666, "learning_rate": 4.06865464632455e-06, "loss": 0.0067, "step": 8563 }, { "epoch": 5.932802216834084, "grad_norm": 0.4364956021308899, "learning_rate": 4.067961165048544e-06, "loss": 0.0095, "step": 8564 }, { "epoch": 5.933494977485279, "grad_norm": 0.5452849864959717, "learning_rate": 4.067267683772539e-06, "loss": 0.0077, "step": 8565 }, { "epoch": 5.934187738136474, "grad_norm": 0.33377793431282043, "learning_rate": 4.066574202496533e-06, "loss": 0.0072, "step": 8566 }, { "epoch": 5.934880498787669, "grad_norm": 0.2791842520236969, "learning_rate": 4.065880721220528e-06, "loss": 0.0067, "step": 8567 }, { "epoch": 5.935573259438864, "grad_norm": 0.3220636546611786, "learning_rate": 4.065187239944522e-06, "loss": 0.0069, "step": 8568 }, { "epoch": 5.936266020090059, "grad_norm": 0.44998860359191895, "learning_rate": 4.064493758668516e-06, "loss": 0.008, "step": 8569 }, { "epoch": 5.936958780741254, "grad_norm": 0.26550137996673584, "learning_rate": 4.063800277392511e-06, "loss": 0.005, "step": 8570 }, { "epoch": 5.937651541392449, "grad_norm": 0.4603528380393982, "learning_rate": 4.063106796116505e-06, "loss": 0.0091, "step": 8571 }, { "epoch": 5.938344302043644, "grad_norm": 0.37784427404403687, "learning_rate": 4.0624133148405e-06, "loss": 0.0082, "step": 8572 }, { "epoch": 5.9390370626948386, "grad_norm": 0.481948584318161, "learning_rate": 4.061719833564494e-06, "loss": 0.0103, "step": 8573 }, { "epoch": 5.939729823346034, "grad_norm": 0.30039507150650024, "learning_rate": 4.061026352288489e-06, "loss": 0.0057, "step": 8574 }, { "epoch": 5.940422583997229, "grad_norm": 0.29231733083724976, "learning_rate": 4.060332871012483e-06, "loss": 0.0048, "step": 8575 }, { "epoch": 5.9411153446484235, "grad_norm": 0.39617523550987244, "learning_rate": 4.059639389736477e-06, "loss": 0.0104, "step": 8576 }, { "epoch": 5.941808105299619, "grad_norm": 0.3052685558795929, "learning_rate": 4.058945908460472e-06, "loss": 0.0083, "step": 8577 }, { "epoch": 5.942500865950814, "grad_norm": 0.2841261625289917, "learning_rate": 4.058252427184466e-06, "loss": 0.0056, "step": 8578 }, { "epoch": 5.943193626602009, "grad_norm": 0.3798808157444, "learning_rate": 4.057558945908461e-06, "loss": 0.0075, "step": 8579 }, { "epoch": 5.943886387253204, "grad_norm": 0.3799256384372711, "learning_rate": 4.056865464632455e-06, "loss": 0.0099, "step": 8580 }, { "epoch": 5.944579147904399, "grad_norm": 0.45977383852005005, "learning_rate": 4.056171983356449e-06, "loss": 0.008, "step": 8581 }, { "epoch": 5.945271908555594, "grad_norm": 0.5314000248908997, "learning_rate": 4.055478502080444e-06, "loss": 0.0101, "step": 8582 }, { "epoch": 5.945964669206789, "grad_norm": 0.4125184416770935, "learning_rate": 4.054785020804439e-06, "loss": 0.0082, "step": 8583 }, { "epoch": 5.946657429857984, "grad_norm": 0.3740287721157074, "learning_rate": 4.054091539528433e-06, "loss": 0.0062, "step": 8584 }, { "epoch": 5.947350190509179, "grad_norm": 0.30348846316337585, "learning_rate": 4.053398058252427e-06, "loss": 0.0069, "step": 8585 }, { "epoch": 5.9480429511603745, "grad_norm": 0.43956172466278076, "learning_rate": 4.052704576976421e-06, "loss": 0.0123, "step": 8586 }, { "epoch": 5.948735711811569, "grad_norm": 0.4679591655731201, "learning_rate": 4.052011095700416e-06, "loss": 0.0091, "step": 8587 }, { "epoch": 5.949428472462764, "grad_norm": 0.5678568482398987, "learning_rate": 4.051317614424411e-06, "loss": 0.0128, "step": 8588 }, { "epoch": 5.9501212331139595, "grad_norm": 0.37824150919914246, "learning_rate": 4.050624133148405e-06, "loss": 0.0068, "step": 8589 }, { "epoch": 5.950813993765154, "grad_norm": 0.38412854075431824, "learning_rate": 4.049930651872399e-06, "loss": 0.0073, "step": 8590 }, { "epoch": 5.951506754416349, "grad_norm": 0.40824028849601746, "learning_rate": 4.049237170596394e-06, "loss": 0.0084, "step": 8591 }, { "epoch": 5.952199515067544, "grad_norm": 0.6860206723213196, "learning_rate": 4.048543689320389e-06, "loss": 0.0077, "step": 8592 }, { "epoch": 5.952892275718739, "grad_norm": 0.28051894903182983, "learning_rate": 4.047850208044383e-06, "loss": 0.0074, "step": 8593 }, { "epoch": 5.953585036369934, "grad_norm": 0.32923415303230286, "learning_rate": 4.047156726768377e-06, "loss": 0.0077, "step": 8594 }, { "epoch": 5.954277797021129, "grad_norm": 0.2752898931503296, "learning_rate": 4.046463245492372e-06, "loss": 0.0059, "step": 8595 }, { "epoch": 5.954970557672324, "grad_norm": 0.4531700313091278, "learning_rate": 4.045769764216366e-06, "loss": 0.0086, "step": 8596 }, { "epoch": 5.955663318323519, "grad_norm": 0.3138566315174103, "learning_rate": 4.045076282940361e-06, "loss": 0.0053, "step": 8597 }, { "epoch": 5.956356078974714, "grad_norm": 0.2832315266132355, "learning_rate": 4.044382801664355e-06, "loss": 0.0059, "step": 8598 }, { "epoch": 5.95704883962591, "grad_norm": 0.33605942130088806, "learning_rate": 4.0436893203883495e-06, "loss": 0.0084, "step": 8599 }, { "epoch": 5.957741600277104, "grad_norm": 0.44466447830200195, "learning_rate": 4.0429958391123444e-06, "loss": 0.0078, "step": 8600 }, { "epoch": 5.958434360928299, "grad_norm": 0.3474466800689697, "learning_rate": 4.0423023578363385e-06, "loss": 0.0065, "step": 8601 }, { "epoch": 5.9591271215794945, "grad_norm": 0.36882033944129944, "learning_rate": 4.0416088765603335e-06, "loss": 0.0057, "step": 8602 }, { "epoch": 5.959819882230689, "grad_norm": 0.31977614760398865, "learning_rate": 4.0409153952843275e-06, "loss": 0.0064, "step": 8603 }, { "epoch": 5.960512642881884, "grad_norm": 0.33210012316703796, "learning_rate": 4.0402219140083225e-06, "loss": 0.009, "step": 8604 }, { "epoch": 5.9612054035330795, "grad_norm": 0.32554617524147034, "learning_rate": 4.0395284327323166e-06, "loss": 0.0061, "step": 8605 }, { "epoch": 5.961898164184275, "grad_norm": 0.32885587215423584, "learning_rate": 4.038834951456311e-06, "loss": 0.0062, "step": 8606 }, { "epoch": 5.962590924835469, "grad_norm": 0.2488987147808075, "learning_rate": 4.038141470180306e-06, "loss": 0.0048, "step": 8607 }, { "epoch": 5.963283685486664, "grad_norm": 0.2795836925506592, "learning_rate": 4.0374479889043e-06, "loss": 0.0061, "step": 8608 }, { "epoch": 5.96397644613786, "grad_norm": 0.47058913111686707, "learning_rate": 4.036754507628295e-06, "loss": 0.0077, "step": 8609 }, { "epoch": 5.964669206789054, "grad_norm": 0.5544996857643127, "learning_rate": 4.036061026352289e-06, "loss": 0.0074, "step": 8610 }, { "epoch": 5.965361967440249, "grad_norm": 0.3298545479774475, "learning_rate": 4.035367545076283e-06, "loss": 0.0069, "step": 8611 }, { "epoch": 5.966054728091445, "grad_norm": 0.3296002745628357, "learning_rate": 4.034674063800278e-06, "loss": 0.0099, "step": 8612 }, { "epoch": 5.966747488742639, "grad_norm": 0.4145912826061249, "learning_rate": 4.033980582524273e-06, "loss": 0.0099, "step": 8613 }, { "epoch": 5.967440249393834, "grad_norm": 0.257587194442749, "learning_rate": 4.033287101248267e-06, "loss": 0.0058, "step": 8614 }, { "epoch": 5.96813301004503, "grad_norm": 0.22960102558135986, "learning_rate": 4.032593619972261e-06, "loss": 0.0046, "step": 8615 }, { "epoch": 5.968825770696224, "grad_norm": 0.2833264172077179, "learning_rate": 4.031900138696255e-06, "loss": 0.0057, "step": 8616 }, { "epoch": 5.969518531347419, "grad_norm": 0.33171358704566956, "learning_rate": 4.03120665742025e-06, "loss": 0.0058, "step": 8617 }, { "epoch": 5.9702112919986146, "grad_norm": 0.3521496057510376, "learning_rate": 4.030513176144245e-06, "loss": 0.0072, "step": 8618 }, { "epoch": 5.97090405264981, "grad_norm": 0.41624125838279724, "learning_rate": 4.029819694868239e-06, "loss": 0.0071, "step": 8619 }, { "epoch": 5.971596813301004, "grad_norm": 0.25357767939567566, "learning_rate": 4.029126213592233e-06, "loss": 0.0052, "step": 8620 }, { "epoch": 5.9722895739521995, "grad_norm": 0.3554152250289917, "learning_rate": 4.028432732316228e-06, "loss": 0.0063, "step": 8621 }, { "epoch": 5.972982334603395, "grad_norm": 0.48351994156837463, "learning_rate": 4.027739251040223e-06, "loss": 0.0073, "step": 8622 }, { "epoch": 5.973675095254589, "grad_norm": 0.3916809558868408, "learning_rate": 4.027045769764217e-06, "loss": 0.0082, "step": 8623 }, { "epoch": 5.9743678559057845, "grad_norm": 0.4536936283111572, "learning_rate": 4.026352288488211e-06, "loss": 0.0085, "step": 8624 }, { "epoch": 5.97506061655698, "grad_norm": 0.3828856348991394, "learning_rate": 4.025658807212205e-06, "loss": 0.0078, "step": 8625 }, { "epoch": 5.975753377208175, "grad_norm": 0.2626187205314636, "learning_rate": 4.0249653259362e-06, "loss": 0.0046, "step": 8626 }, { "epoch": 5.976446137859369, "grad_norm": 0.40936556458473206, "learning_rate": 4.024271844660195e-06, "loss": 0.0065, "step": 8627 }, { "epoch": 5.977138898510565, "grad_norm": 0.991696298122406, "learning_rate": 4.023578363384189e-06, "loss": 0.0096, "step": 8628 }, { "epoch": 5.97783165916176, "grad_norm": 0.36345651745796204, "learning_rate": 4.022884882108183e-06, "loss": 0.0065, "step": 8629 }, { "epoch": 5.978524419812954, "grad_norm": 0.43531450629234314, "learning_rate": 4.022191400832178e-06, "loss": 0.0073, "step": 8630 }, { "epoch": 5.97921718046415, "grad_norm": 0.36994925141334534, "learning_rate": 4.021497919556172e-06, "loss": 0.0081, "step": 8631 }, { "epoch": 5.979909941115345, "grad_norm": 0.27669623494148254, "learning_rate": 4.020804438280167e-06, "loss": 0.0061, "step": 8632 }, { "epoch": 5.980602701766539, "grad_norm": 0.7132675051689148, "learning_rate": 4.020110957004161e-06, "loss": 0.0104, "step": 8633 }, { "epoch": 5.981295462417735, "grad_norm": 0.3345467746257782, "learning_rate": 4.019417475728156e-06, "loss": 0.0081, "step": 8634 }, { "epoch": 5.98198822306893, "grad_norm": 0.3310278654098511, "learning_rate": 4.01872399445215e-06, "loss": 0.0061, "step": 8635 }, { "epoch": 5.982680983720124, "grad_norm": 0.48464733362197876, "learning_rate": 4.018030513176144e-06, "loss": 0.0054, "step": 8636 }, { "epoch": 5.9833737443713195, "grad_norm": 0.7874982953071594, "learning_rate": 4.017337031900139e-06, "loss": 0.0089, "step": 8637 }, { "epoch": 5.984066505022515, "grad_norm": 0.4229066073894501, "learning_rate": 4.016643550624133e-06, "loss": 0.0082, "step": 8638 }, { "epoch": 5.98475926567371, "grad_norm": 0.43128204345703125, "learning_rate": 4.015950069348128e-06, "loss": 0.0106, "step": 8639 }, { "epoch": 5.9854520263249045, "grad_norm": 0.44995054602622986, "learning_rate": 4.015256588072122e-06, "loss": 0.0071, "step": 8640 }, { "epoch": 5.9861447869761, "grad_norm": 0.32228732109069824, "learning_rate": 4.014563106796116e-06, "loss": 0.005, "step": 8641 }, { "epoch": 5.986837547627295, "grad_norm": 0.514509916305542, "learning_rate": 4.013869625520111e-06, "loss": 0.0068, "step": 8642 }, { "epoch": 5.987530308278489, "grad_norm": 0.4599981904029846, "learning_rate": 4.013176144244106e-06, "loss": 0.01, "step": 8643 }, { "epoch": 5.988223068929685, "grad_norm": 0.3862370550632477, "learning_rate": 4.0124826629681e-06, "loss": 0.0074, "step": 8644 }, { "epoch": 5.98891582958088, "grad_norm": 0.4147402048110962, "learning_rate": 4.011789181692094e-06, "loss": 0.0089, "step": 8645 }, { "epoch": 5.989608590232075, "grad_norm": 0.34358009696006775, "learning_rate": 4.0110957004160885e-06, "loss": 0.0071, "step": 8646 }, { "epoch": 5.99030135088327, "grad_norm": 0.33831122517585754, "learning_rate": 4.0104022191400834e-06, "loss": 0.0078, "step": 8647 }, { "epoch": 5.990994111534465, "grad_norm": 0.2806399464607239, "learning_rate": 4.009708737864078e-06, "loss": 0.0049, "step": 8648 }, { "epoch": 5.99168687218566, "grad_norm": 0.5958036780357361, "learning_rate": 4.0090152565880725e-06, "loss": 0.012, "step": 8649 }, { "epoch": 5.992379632836855, "grad_norm": 0.42996886372566223, "learning_rate": 4.0083217753120665e-06, "loss": 0.0085, "step": 8650 }, { "epoch": 5.99307239348805, "grad_norm": 0.29199519753456116, "learning_rate": 4.0076282940360615e-06, "loss": 0.0066, "step": 8651 }, { "epoch": 5.993765154139245, "grad_norm": 0.5027766823768616, "learning_rate": 4.006934812760056e-06, "loss": 0.0141, "step": 8652 }, { "epoch": 5.9944579147904395, "grad_norm": 0.3328966200351715, "learning_rate": 4.0062413314840505e-06, "loss": 0.0071, "step": 8653 }, { "epoch": 5.995150675441635, "grad_norm": 0.5493451952934265, "learning_rate": 4.005547850208045e-06, "loss": 0.0088, "step": 8654 }, { "epoch": 5.99584343609283, "grad_norm": 0.4130370318889618, "learning_rate": 4.004854368932039e-06, "loss": 0.0072, "step": 8655 }, { "epoch": 5.9965361967440245, "grad_norm": 0.30309581756591797, "learning_rate": 4.004160887656034e-06, "loss": 0.009, "step": 8656 }, { "epoch": 5.99722895739522, "grad_norm": 0.431990385055542, "learning_rate": 4.0034674063800285e-06, "loss": 0.0098, "step": 8657 }, { "epoch": 5.997921718046415, "grad_norm": 0.35093095898628235, "learning_rate": 4.002773925104023e-06, "loss": 0.0079, "step": 8658 }, { "epoch": 5.99861447869761, "grad_norm": 0.4860779345035553, "learning_rate": 4.002080443828017e-06, "loss": 0.0087, "step": 8659 }, { "epoch": 5.999307239348805, "grad_norm": 0.31936201453208923, "learning_rate": 4.001386962552012e-06, "loss": 0.0057, "step": 8660 }, { "epoch": 6.0, "grad_norm": 0.3616134226322174, "learning_rate": 4.000693481276006e-06, "loss": 0.0079, "step": 8661 }, { "epoch": 6.0, "eval_loss": 0.28647318482398987, "eval_runtime": 7657.2252, "eval_samples_per_second": 1.045, "eval_steps_per_second": 0.033, "eval_wer": 12.49403077209779, "step": 8661 }, { "epoch": 6.000692760651195, "grad_norm": 0.18963384628295898, "learning_rate": 4.000000000000001e-06, "loss": 0.0033, "step": 8662 }, { "epoch": 6.00138552130239, "grad_norm": 0.21622170507907867, "learning_rate": 3.999306518723995e-06, "loss": 0.0042, "step": 8663 }, { "epoch": 6.002078281953585, "grad_norm": 0.3468954563140869, "learning_rate": 3.998613037447989e-06, "loss": 0.0066, "step": 8664 }, { "epoch": 6.00277104260478, "grad_norm": 0.1983821988105774, "learning_rate": 3.997919556171984e-06, "loss": 0.0039, "step": 8665 }, { "epoch": 6.003463803255975, "grad_norm": 0.2540256381034851, "learning_rate": 3.997226074895978e-06, "loss": 0.0056, "step": 8666 }, { "epoch": 6.00415656390717, "grad_norm": 0.22275549173355103, "learning_rate": 3.996532593619973e-06, "loss": 0.0043, "step": 8667 }, { "epoch": 6.004849324558365, "grad_norm": 0.31321004033088684, "learning_rate": 3.995839112343967e-06, "loss": 0.004, "step": 8668 }, { "epoch": 6.0055420852095605, "grad_norm": 0.3807545006275177, "learning_rate": 3.995145631067962e-06, "loss": 0.0044, "step": 8669 }, { "epoch": 6.006234845860755, "grad_norm": 0.3322286903858185, "learning_rate": 3.994452149791956e-06, "loss": 0.0058, "step": 8670 }, { "epoch": 6.00692760651195, "grad_norm": 0.2569510340690613, "learning_rate": 3.99375866851595e-06, "loss": 0.0037, "step": 8671 }, { "epoch": 6.007620367163145, "grad_norm": 0.17756123840808868, "learning_rate": 3.993065187239945e-06, "loss": 0.0038, "step": 8672 }, { "epoch": 6.00831312781434, "grad_norm": 0.35195431113243103, "learning_rate": 3.992371705963939e-06, "loss": 0.0063, "step": 8673 }, { "epoch": 6.009005888465535, "grad_norm": 0.1642107516527176, "learning_rate": 3.991678224687934e-06, "loss": 0.0029, "step": 8674 }, { "epoch": 6.00969864911673, "grad_norm": 0.2845403850078583, "learning_rate": 3.990984743411928e-06, "loss": 0.004, "step": 8675 }, { "epoch": 6.010391409767925, "grad_norm": 0.22731952369213104, "learning_rate": 3.990291262135922e-06, "loss": 0.0045, "step": 8676 }, { "epoch": 6.01108417041912, "grad_norm": 0.3325885236263275, "learning_rate": 3.989597780859917e-06, "loss": 0.0041, "step": 8677 }, { "epoch": 6.011776931070315, "grad_norm": 0.20064549148082733, "learning_rate": 3.988904299583912e-06, "loss": 0.0038, "step": 8678 }, { "epoch": 6.012469691721511, "grad_norm": 0.1457013040781021, "learning_rate": 3.988210818307906e-06, "loss": 0.0029, "step": 8679 }, { "epoch": 6.013162452372705, "grad_norm": 0.171246737241745, "learning_rate": 3.9875173370319e-06, "loss": 0.0026, "step": 8680 }, { "epoch": 6.0138552130239, "grad_norm": 0.1949104517698288, "learning_rate": 3.986823855755895e-06, "loss": 0.0032, "step": 8681 }, { "epoch": 6.0145479736750955, "grad_norm": 0.1342078000307083, "learning_rate": 3.98613037447989e-06, "loss": 0.003, "step": 8682 }, { "epoch": 6.01524073432629, "grad_norm": 0.3117418885231018, "learning_rate": 3.985436893203884e-06, "loss": 0.0052, "step": 8683 }, { "epoch": 6.015933494977485, "grad_norm": 0.28929945826530457, "learning_rate": 3.984743411927878e-06, "loss": 0.005, "step": 8684 }, { "epoch": 6.0166262556286805, "grad_norm": 0.32413914799690247, "learning_rate": 3.984049930651872e-06, "loss": 0.0054, "step": 8685 }, { "epoch": 6.017319016279875, "grad_norm": 0.3699895739555359, "learning_rate": 3.983356449375867e-06, "loss": 0.004, "step": 8686 }, { "epoch": 6.01801177693107, "grad_norm": 0.23407377302646637, "learning_rate": 3.982662968099862e-06, "loss": 0.0032, "step": 8687 }, { "epoch": 6.018704537582265, "grad_norm": 0.2936857342720032, "learning_rate": 3.981969486823856e-06, "loss": 0.0044, "step": 8688 }, { "epoch": 6.019397298233461, "grad_norm": 0.43710726499557495, "learning_rate": 3.98127600554785e-06, "loss": 0.0044, "step": 8689 }, { "epoch": 6.020090058884655, "grad_norm": 0.2992812693119049, "learning_rate": 3.980582524271845e-06, "loss": 0.0057, "step": 8690 }, { "epoch": 6.02078281953585, "grad_norm": 0.3120293617248535, "learning_rate": 3.979889042995839e-06, "loss": 0.0039, "step": 8691 }, { "epoch": 6.021475580187046, "grad_norm": 0.18237251043319702, "learning_rate": 3.979195561719834e-06, "loss": 0.0035, "step": 8692 }, { "epoch": 6.02216834083824, "grad_norm": 0.1504957675933838, "learning_rate": 3.978502080443828e-06, "loss": 0.0023, "step": 8693 }, { "epoch": 6.022861101489435, "grad_norm": 0.5236929655075073, "learning_rate": 3.9778085991678224e-06, "loss": 0.0043, "step": 8694 }, { "epoch": 6.023553862140631, "grad_norm": 0.08897154033184052, "learning_rate": 3.977115117891817e-06, "loss": 0.0024, "step": 8695 }, { "epoch": 6.024246622791825, "grad_norm": 0.24160565435886383, "learning_rate": 3.9764216366158115e-06, "loss": 0.0035, "step": 8696 }, { "epoch": 6.02493938344302, "grad_norm": 0.21429160237312317, "learning_rate": 3.975728155339806e-06, "loss": 0.0033, "step": 8697 }, { "epoch": 6.0256321440942155, "grad_norm": 0.19688105583190918, "learning_rate": 3.9750346740638005e-06, "loss": 0.0033, "step": 8698 }, { "epoch": 6.026324904745411, "grad_norm": 0.14925949275493622, "learning_rate": 3.974341192787795e-06, "loss": 0.0029, "step": 8699 }, { "epoch": 6.027017665396605, "grad_norm": 0.33217620849609375, "learning_rate": 3.9736477115117895e-06, "loss": 0.0042, "step": 8700 }, { "epoch": 6.0277104260478005, "grad_norm": 0.3502248227596283, "learning_rate": 3.972954230235784e-06, "loss": 0.0101, "step": 8701 }, { "epoch": 6.028403186698996, "grad_norm": 0.1795787215232849, "learning_rate": 3.9722607489597785e-06, "loss": 0.0033, "step": 8702 }, { "epoch": 6.02909594735019, "grad_norm": 0.2041216939687729, "learning_rate": 3.971567267683773e-06, "loss": 0.0039, "step": 8703 }, { "epoch": 6.0297887080013854, "grad_norm": 0.19797015190124512, "learning_rate": 3.9708737864077675e-06, "loss": 0.0034, "step": 8704 }, { "epoch": 6.030481468652581, "grad_norm": 0.2640591561794281, "learning_rate": 3.970180305131762e-06, "loss": 0.0039, "step": 8705 }, { "epoch": 6.031174229303775, "grad_norm": 0.2120644450187683, "learning_rate": 3.969486823855756e-06, "loss": 0.004, "step": 8706 }, { "epoch": 6.03186698995497, "grad_norm": 0.4690014719963074, "learning_rate": 3.968793342579751e-06, "loss": 0.0065, "step": 8707 }, { "epoch": 6.032559750606166, "grad_norm": 0.24471673369407654, "learning_rate": 3.968099861303746e-06, "loss": 0.0034, "step": 8708 }, { "epoch": 6.033252511257361, "grad_norm": 0.2979377806186676, "learning_rate": 3.96740638002774e-06, "loss": 0.0059, "step": 8709 }, { "epoch": 6.033945271908555, "grad_norm": 0.14429903030395508, "learning_rate": 3.966712898751734e-06, "loss": 0.0031, "step": 8710 }, { "epoch": 6.034638032559751, "grad_norm": 0.2544229328632355, "learning_rate": 3.966019417475729e-06, "loss": 0.0038, "step": 8711 }, { "epoch": 6.035330793210946, "grad_norm": 0.13747026026248932, "learning_rate": 3.965325936199723e-06, "loss": 0.0029, "step": 8712 }, { "epoch": 6.03602355386214, "grad_norm": 0.3124599754810333, "learning_rate": 3.964632454923718e-06, "loss": 0.005, "step": 8713 }, { "epoch": 6.036716314513336, "grad_norm": 0.14773334562778473, "learning_rate": 3.963938973647712e-06, "loss": 0.003, "step": 8714 }, { "epoch": 6.037409075164531, "grad_norm": 0.40750443935394287, "learning_rate": 3.963245492371706e-06, "loss": 0.004, "step": 8715 }, { "epoch": 6.038101835815725, "grad_norm": 0.1526937633752823, "learning_rate": 3.962552011095701e-06, "loss": 0.0025, "step": 8716 }, { "epoch": 6.0387945964669205, "grad_norm": 0.17284125089645386, "learning_rate": 3.961858529819696e-06, "loss": 0.0035, "step": 8717 }, { "epoch": 6.039487357118116, "grad_norm": 0.3093487620353699, "learning_rate": 3.96116504854369e-06, "loss": 0.0038, "step": 8718 }, { "epoch": 6.040180117769311, "grad_norm": 0.4244932532310486, "learning_rate": 3.960471567267684e-06, "loss": 0.0062, "step": 8719 }, { "epoch": 6.0408728784205055, "grad_norm": 0.20396049320697784, "learning_rate": 3.959778085991679e-06, "loss": 0.0034, "step": 8720 }, { "epoch": 6.041565639071701, "grad_norm": 0.20464245975017548, "learning_rate": 3.959084604715673e-06, "loss": 0.0033, "step": 8721 }, { "epoch": 6.042258399722896, "grad_norm": 0.17733533680438995, "learning_rate": 3.958391123439668e-06, "loss": 0.0034, "step": 8722 }, { "epoch": 6.04295116037409, "grad_norm": 0.5362047553062439, "learning_rate": 3.957697642163662e-06, "loss": 0.0059, "step": 8723 }, { "epoch": 6.043643921025286, "grad_norm": 0.19199024140834808, "learning_rate": 3.957004160887656e-06, "loss": 0.0035, "step": 8724 }, { "epoch": 6.044336681676481, "grad_norm": 0.2026488333940506, "learning_rate": 3.956310679611651e-06, "loss": 0.0034, "step": 8725 }, { "epoch": 6.045029442327675, "grad_norm": 0.34202784299850464, "learning_rate": 3.955617198335645e-06, "loss": 0.006, "step": 8726 }, { "epoch": 6.045722202978871, "grad_norm": 0.18430756032466888, "learning_rate": 3.95492371705964e-06, "loss": 0.0028, "step": 8727 }, { "epoch": 6.046414963630066, "grad_norm": 0.2855680584907532, "learning_rate": 3.954230235783634e-06, "loss": 0.0046, "step": 8728 }, { "epoch": 6.047107724281261, "grad_norm": 0.20251581072807312, "learning_rate": 3.953536754507629e-06, "loss": 0.0026, "step": 8729 }, { "epoch": 6.047800484932456, "grad_norm": 0.35224398970603943, "learning_rate": 3.952843273231623e-06, "loss": 0.004, "step": 8730 }, { "epoch": 6.048493245583651, "grad_norm": 0.23019564151763916, "learning_rate": 3.952149791955617e-06, "loss": 0.0033, "step": 8731 }, { "epoch": 6.049186006234846, "grad_norm": 0.20461471378803253, "learning_rate": 3.951456310679612e-06, "loss": 0.0033, "step": 8732 }, { "epoch": 6.0498787668860405, "grad_norm": 0.482501745223999, "learning_rate": 3.950762829403606e-06, "loss": 0.0077, "step": 8733 }, { "epoch": 6.050571527537236, "grad_norm": 0.1371850222349167, "learning_rate": 3.950069348127601e-06, "loss": 0.0029, "step": 8734 }, { "epoch": 6.051264288188431, "grad_norm": 0.26176711916923523, "learning_rate": 3.949375866851595e-06, "loss": 0.0048, "step": 8735 }, { "epoch": 6.0519570488396255, "grad_norm": 0.17871302366256714, "learning_rate": 3.948682385575589e-06, "loss": 0.0031, "step": 8736 }, { "epoch": 6.052649809490821, "grad_norm": 0.353868305683136, "learning_rate": 3.947988904299584e-06, "loss": 0.0034, "step": 8737 }, { "epoch": 6.053342570142016, "grad_norm": 0.15032900869846344, "learning_rate": 3.947295423023579e-06, "loss": 0.0034, "step": 8738 }, { "epoch": 6.054035330793211, "grad_norm": 0.19332966208457947, "learning_rate": 3.946601941747573e-06, "loss": 0.0035, "step": 8739 }, { "epoch": 6.054728091444406, "grad_norm": 0.14572642743587494, "learning_rate": 3.945908460471567e-06, "loss": 0.003, "step": 8740 }, { "epoch": 6.055420852095601, "grad_norm": 0.1290379911661148, "learning_rate": 3.9452149791955614e-06, "loss": 0.0026, "step": 8741 }, { "epoch": 6.056113612746796, "grad_norm": 0.1428215503692627, "learning_rate": 3.944521497919556e-06, "loss": 0.0024, "step": 8742 }, { "epoch": 6.056806373397991, "grad_norm": 0.27382057905197144, "learning_rate": 3.943828016643551e-06, "loss": 0.0039, "step": 8743 }, { "epoch": 6.057499134049186, "grad_norm": 0.1334439218044281, "learning_rate": 3.943134535367545e-06, "loss": 0.0025, "step": 8744 }, { "epoch": 6.058191894700381, "grad_norm": 0.33077719807624817, "learning_rate": 3.9424410540915395e-06, "loss": 0.0048, "step": 8745 }, { "epoch": 6.058884655351576, "grad_norm": 0.21852080523967743, "learning_rate": 3.941747572815534e-06, "loss": 0.0034, "step": 8746 }, { "epoch": 6.059577416002771, "grad_norm": 0.1549639254808426, "learning_rate": 3.941054091539529e-06, "loss": 0.0033, "step": 8747 }, { "epoch": 6.060270176653966, "grad_norm": 0.31655940413475037, "learning_rate": 3.9403606102635234e-06, "loss": 0.0054, "step": 8748 }, { "epoch": 6.0609629373051614, "grad_norm": 0.2889525890350342, "learning_rate": 3.9396671289875175e-06, "loss": 0.0037, "step": 8749 }, { "epoch": 6.061655697956356, "grad_norm": 0.25274062156677246, "learning_rate": 3.938973647711512e-06, "loss": 0.0039, "step": 8750 }, { "epoch": 6.062348458607551, "grad_norm": 0.26409873366355896, "learning_rate": 3.9382801664355065e-06, "loss": 0.0045, "step": 8751 }, { "epoch": 6.063041219258746, "grad_norm": 0.3435945212841034, "learning_rate": 3.9375866851595015e-06, "loss": 0.0041, "step": 8752 }, { "epoch": 6.063733979909941, "grad_norm": 0.15153640508651733, "learning_rate": 3.9368932038834956e-06, "loss": 0.0042, "step": 8753 }, { "epoch": 6.064426740561136, "grad_norm": 0.1768701821565628, "learning_rate": 3.93619972260749e-06, "loss": 0.0034, "step": 8754 }, { "epoch": 6.065119501212331, "grad_norm": 0.2529245913028717, "learning_rate": 3.935506241331485e-06, "loss": 0.0053, "step": 8755 }, { "epoch": 6.065812261863526, "grad_norm": 0.35842397809028625, "learning_rate": 3.934812760055479e-06, "loss": 0.0035, "step": 8756 }, { "epoch": 6.066505022514721, "grad_norm": 0.17603647708892822, "learning_rate": 3.934119278779474e-06, "loss": 0.0028, "step": 8757 }, { "epoch": 6.067197783165916, "grad_norm": 0.36040037870407104, "learning_rate": 3.933425797503468e-06, "loss": 0.0038, "step": 8758 }, { "epoch": 6.067890543817112, "grad_norm": 0.2548173666000366, "learning_rate": 3.932732316227463e-06, "loss": 0.0035, "step": 8759 }, { "epoch": 6.068583304468306, "grad_norm": 0.24014891684055328, "learning_rate": 3.932038834951457e-06, "loss": 0.0052, "step": 8760 }, { "epoch": 6.069276065119501, "grad_norm": 0.14708179235458374, "learning_rate": 3.931345353675451e-06, "loss": 0.0032, "step": 8761 }, { "epoch": 6.0699688257706965, "grad_norm": 0.2969667911529541, "learning_rate": 3.930651872399446e-06, "loss": 0.0059, "step": 8762 }, { "epoch": 6.070661586421891, "grad_norm": 0.3556962311267853, "learning_rate": 3.92995839112344e-06, "loss": 0.005, "step": 8763 }, { "epoch": 6.071354347073086, "grad_norm": 0.21978433430194855, "learning_rate": 3.929264909847435e-06, "loss": 0.0033, "step": 8764 }, { "epoch": 6.0720471077242815, "grad_norm": 0.1866694539785385, "learning_rate": 3.928571428571429e-06, "loss": 0.0038, "step": 8765 }, { "epoch": 6.072739868375476, "grad_norm": 0.1878751814365387, "learning_rate": 3.927877947295423e-06, "loss": 0.0034, "step": 8766 }, { "epoch": 6.073432629026671, "grad_norm": 0.2639264166355133, "learning_rate": 3.927184466019418e-06, "loss": 0.0049, "step": 8767 }, { "epoch": 6.074125389677866, "grad_norm": 0.16790176928043365, "learning_rate": 3.926490984743413e-06, "loss": 0.0035, "step": 8768 }, { "epoch": 6.074818150329062, "grad_norm": 0.21823951601982117, "learning_rate": 3.925797503467407e-06, "loss": 0.0044, "step": 8769 }, { "epoch": 6.075510910980256, "grad_norm": 0.25152066349983215, "learning_rate": 3.925104022191401e-06, "loss": 0.0029, "step": 8770 }, { "epoch": 6.076203671631451, "grad_norm": 0.27631130814552307, "learning_rate": 3.924410540915395e-06, "loss": 0.0051, "step": 8771 }, { "epoch": 6.076896432282647, "grad_norm": 0.18931017816066742, "learning_rate": 3.92371705963939e-06, "loss": 0.0028, "step": 8772 }, { "epoch": 6.077589192933841, "grad_norm": 0.15689823031425476, "learning_rate": 3.923023578363385e-06, "loss": 0.0028, "step": 8773 }, { "epoch": 6.078281953585036, "grad_norm": 0.15919512510299683, "learning_rate": 3.922330097087379e-06, "loss": 0.0034, "step": 8774 }, { "epoch": 6.078974714236232, "grad_norm": 0.2085564285516739, "learning_rate": 3.921636615811373e-06, "loss": 0.0049, "step": 8775 }, { "epoch": 6.079667474887426, "grad_norm": 0.20154698193073273, "learning_rate": 3.920943134535368e-06, "loss": 0.004, "step": 8776 }, { "epoch": 6.080360235538621, "grad_norm": 0.32937222719192505, "learning_rate": 3.920249653259363e-06, "loss": 0.0074, "step": 8777 }, { "epoch": 6.0810529961898165, "grad_norm": 0.36560726165771484, "learning_rate": 3.919556171983357e-06, "loss": 0.0038, "step": 8778 }, { "epoch": 6.081745756841012, "grad_norm": 0.15626446902751923, "learning_rate": 3.918862690707351e-06, "loss": 0.0027, "step": 8779 }, { "epoch": 6.082438517492206, "grad_norm": 0.5248779654502869, "learning_rate": 3.918169209431345e-06, "loss": 0.003, "step": 8780 }, { "epoch": 6.0831312781434015, "grad_norm": 0.1562829613685608, "learning_rate": 3.91747572815534e-06, "loss": 0.0038, "step": 8781 }, { "epoch": 6.083824038794597, "grad_norm": 0.16246558725833893, "learning_rate": 3.916782246879335e-06, "loss": 0.0033, "step": 8782 }, { "epoch": 6.084516799445791, "grad_norm": 0.11507923901081085, "learning_rate": 3.916088765603329e-06, "loss": 0.0028, "step": 8783 }, { "epoch": 6.085209560096986, "grad_norm": 0.26600173115730286, "learning_rate": 3.915395284327323e-06, "loss": 0.0047, "step": 8784 }, { "epoch": 6.085902320748182, "grad_norm": 0.26927924156188965, "learning_rate": 3.914701803051318e-06, "loss": 0.0036, "step": 8785 }, { "epoch": 6.086595081399376, "grad_norm": 0.3145221173763275, "learning_rate": 3.914008321775312e-06, "loss": 0.0034, "step": 8786 }, { "epoch": 6.087287842050571, "grad_norm": 0.27800190448760986, "learning_rate": 3.913314840499307e-06, "loss": 0.0029, "step": 8787 }, { "epoch": 6.087980602701767, "grad_norm": 0.19154535233974457, "learning_rate": 3.912621359223301e-06, "loss": 0.0036, "step": 8788 }, { "epoch": 6.088673363352962, "grad_norm": 0.8603630065917969, "learning_rate": 3.911927877947295e-06, "loss": 0.0046, "step": 8789 }, { "epoch": 6.089366124004156, "grad_norm": 0.3638884127140045, "learning_rate": 3.91123439667129e-06, "loss": 0.003, "step": 8790 }, { "epoch": 6.090058884655352, "grad_norm": 0.19491951167583466, "learning_rate": 3.910540915395284e-06, "loss": 0.0037, "step": 8791 }, { "epoch": 6.090751645306547, "grad_norm": 0.22996185719966888, "learning_rate": 3.909847434119279e-06, "loss": 0.0034, "step": 8792 }, { "epoch": 6.091444405957741, "grad_norm": 0.29953622817993164, "learning_rate": 3.909153952843273e-06, "loss": 0.0026, "step": 8793 }, { "epoch": 6.092137166608937, "grad_norm": 0.1870751529932022, "learning_rate": 3.908460471567268e-06, "loss": 0.0029, "step": 8794 }, { "epoch": 6.092829927260132, "grad_norm": 0.20994138717651367, "learning_rate": 3.9077669902912624e-06, "loss": 0.0039, "step": 8795 }, { "epoch": 6.093522687911326, "grad_norm": 0.1280861347913742, "learning_rate": 3.9070735090152565e-06, "loss": 0.0029, "step": 8796 }, { "epoch": 6.0942154485625215, "grad_norm": 0.33243700861930847, "learning_rate": 3.9063800277392515e-06, "loss": 0.0038, "step": 8797 }, { "epoch": 6.094908209213717, "grad_norm": 0.21579419076442719, "learning_rate": 3.905686546463246e-06, "loss": 0.0027, "step": 8798 }, { "epoch": 6.095600969864912, "grad_norm": 0.3789650499820709, "learning_rate": 3.9049930651872405e-06, "loss": 0.0067, "step": 8799 }, { "epoch": 6.0962937305161065, "grad_norm": 0.25341564416885376, "learning_rate": 3.9042995839112346e-06, "loss": 0.0032, "step": 8800 }, { "epoch": 6.096986491167302, "grad_norm": 0.27467408776283264, "learning_rate": 3.903606102635229e-06, "loss": 0.0049, "step": 8801 }, { "epoch": 6.097679251818497, "grad_norm": 0.14345599710941315, "learning_rate": 3.902912621359224e-06, "loss": 0.0026, "step": 8802 }, { "epoch": 6.098372012469691, "grad_norm": 0.2033206969499588, "learning_rate": 3.9022191400832185e-06, "loss": 0.0032, "step": 8803 }, { "epoch": 6.099064773120887, "grad_norm": 0.17043140530586243, "learning_rate": 3.901525658807213e-06, "loss": 0.0031, "step": 8804 }, { "epoch": 6.099757533772082, "grad_norm": 0.23245030641555786, "learning_rate": 3.900832177531207e-06, "loss": 0.0038, "step": 8805 }, { "epoch": 6.100450294423276, "grad_norm": 0.27172887325286865, "learning_rate": 3.900138696255202e-06, "loss": 0.0035, "step": 8806 }, { "epoch": 6.101143055074472, "grad_norm": 0.17954404652118683, "learning_rate": 3.8994452149791966e-06, "loss": 0.003, "step": 8807 }, { "epoch": 6.101835815725667, "grad_norm": 0.20926862955093384, "learning_rate": 3.898751733703191e-06, "loss": 0.003, "step": 8808 }, { "epoch": 6.102528576376862, "grad_norm": 0.27428656816482544, "learning_rate": 3.898058252427185e-06, "loss": 0.004, "step": 8809 }, { "epoch": 6.103221337028057, "grad_norm": 0.22884447872638702, "learning_rate": 3.897364771151179e-06, "loss": 0.0026, "step": 8810 }, { "epoch": 6.103914097679252, "grad_norm": 0.13015517592430115, "learning_rate": 3.896671289875174e-06, "loss": 0.0027, "step": 8811 }, { "epoch": 6.104606858330447, "grad_norm": 0.19904249906539917, "learning_rate": 3.895977808599169e-06, "loss": 0.003, "step": 8812 }, { "epoch": 6.1052996189816415, "grad_norm": 0.1918070763349533, "learning_rate": 3.895284327323163e-06, "loss": 0.0039, "step": 8813 }, { "epoch": 6.105992379632837, "grad_norm": 0.15057818591594696, "learning_rate": 3.894590846047157e-06, "loss": 0.0032, "step": 8814 }, { "epoch": 6.106685140284032, "grad_norm": 0.09813568741083145, "learning_rate": 3.893897364771152e-06, "loss": 0.002, "step": 8815 }, { "epoch": 6.1073779009352265, "grad_norm": 0.45844826102256775, "learning_rate": 3.893203883495146e-06, "loss": 0.0032, "step": 8816 }, { "epoch": 6.108070661586422, "grad_norm": 0.2586424648761749, "learning_rate": 3.892510402219141e-06, "loss": 0.0039, "step": 8817 }, { "epoch": 6.108763422237617, "grad_norm": 0.184055358171463, "learning_rate": 3.891816920943135e-06, "loss": 0.0032, "step": 8818 }, { "epoch": 6.109456182888812, "grad_norm": 0.23174357414245605, "learning_rate": 3.891123439667129e-06, "loss": 0.0031, "step": 8819 }, { "epoch": 6.110148943540007, "grad_norm": 0.23705334961414337, "learning_rate": 3.890429958391124e-06, "loss": 0.0026, "step": 8820 }, { "epoch": 6.110841704191202, "grad_norm": 0.12069892883300781, "learning_rate": 3.889736477115118e-06, "loss": 0.0026, "step": 8821 }, { "epoch": 6.111534464842397, "grad_norm": 0.19660265743732452, "learning_rate": 3.889042995839113e-06, "loss": 0.0033, "step": 8822 }, { "epoch": 6.112227225493592, "grad_norm": 0.2262856662273407, "learning_rate": 3.888349514563107e-06, "loss": 0.0028, "step": 8823 }, { "epoch": 6.112919986144787, "grad_norm": 0.1962341070175171, "learning_rate": 3.887656033287102e-06, "loss": 0.0032, "step": 8824 }, { "epoch": 6.113612746795982, "grad_norm": 0.2887985408306122, "learning_rate": 3.886962552011096e-06, "loss": 0.0033, "step": 8825 }, { "epoch": 6.114305507447177, "grad_norm": 0.17293819785118103, "learning_rate": 3.88626907073509e-06, "loss": 0.0033, "step": 8826 }, { "epoch": 6.114998268098372, "grad_norm": 0.47911661863327026, "learning_rate": 3.885575589459085e-06, "loss": 0.0052, "step": 8827 }, { "epoch": 6.115691028749567, "grad_norm": 0.24279531836509705, "learning_rate": 3.884882108183079e-06, "loss": 0.0036, "step": 8828 }, { "epoch": 6.116383789400762, "grad_norm": 0.19363507628440857, "learning_rate": 3.884188626907074e-06, "loss": 0.003, "step": 8829 }, { "epoch": 6.117076550051957, "grad_norm": 0.2090897113084793, "learning_rate": 3.883495145631068e-06, "loss": 0.0029, "step": 8830 }, { "epoch": 6.117769310703152, "grad_norm": 0.512018620967865, "learning_rate": 3.882801664355062e-06, "loss": 0.0063, "step": 8831 }, { "epoch": 6.118462071354347, "grad_norm": 0.2961897552013397, "learning_rate": 3.882108183079057e-06, "loss": 0.0036, "step": 8832 }, { "epoch": 6.119154832005542, "grad_norm": 0.12247592955827713, "learning_rate": 3.881414701803052e-06, "loss": 0.0027, "step": 8833 }, { "epoch": 6.119847592656737, "grad_norm": 0.4169470965862274, "learning_rate": 3.880721220527046e-06, "loss": 0.0041, "step": 8834 }, { "epoch": 6.120540353307932, "grad_norm": 0.15056347846984863, "learning_rate": 3.88002773925104e-06, "loss": 0.0025, "step": 8835 }, { "epoch": 6.121233113959127, "grad_norm": 0.36095133423805237, "learning_rate": 3.879334257975035e-06, "loss": 0.0035, "step": 8836 }, { "epoch": 6.121925874610322, "grad_norm": 0.2863277792930603, "learning_rate": 3.878640776699029e-06, "loss": 0.0059, "step": 8837 }, { "epoch": 6.122618635261517, "grad_norm": 0.36874857544898987, "learning_rate": 3.877947295423024e-06, "loss": 0.0055, "step": 8838 }, { "epoch": 6.123311395912713, "grad_norm": 0.1970696896314621, "learning_rate": 3.877253814147018e-06, "loss": 0.0035, "step": 8839 }, { "epoch": 6.124004156563907, "grad_norm": 0.34227070212364197, "learning_rate": 3.876560332871012e-06, "loss": 0.004, "step": 8840 }, { "epoch": 6.124696917215102, "grad_norm": 0.13455091416835785, "learning_rate": 3.875866851595007e-06, "loss": 0.002, "step": 8841 }, { "epoch": 6.1253896778662975, "grad_norm": 0.17953209578990936, "learning_rate": 3.875173370319002e-06, "loss": 0.0027, "step": 8842 }, { "epoch": 6.126082438517492, "grad_norm": 0.23272685706615448, "learning_rate": 3.874479889042996e-06, "loss": 0.0034, "step": 8843 }, { "epoch": 6.126775199168687, "grad_norm": 0.14728665351867676, "learning_rate": 3.8737864077669905e-06, "loss": 0.0026, "step": 8844 }, { "epoch": 6.1274679598198825, "grad_norm": 0.27838802337646484, "learning_rate": 3.873092926490985e-06, "loss": 0.0037, "step": 8845 }, { "epoch": 6.128160720471077, "grad_norm": 0.26910194754600525, "learning_rate": 3.8723994452149795e-06, "loss": 0.0069, "step": 8846 }, { "epoch": 6.128853481122272, "grad_norm": 0.18215250968933105, "learning_rate": 3.8717059639389736e-06, "loss": 0.0028, "step": 8847 }, { "epoch": 6.129546241773467, "grad_norm": 0.26187875866889954, "learning_rate": 3.8710124826629685e-06, "loss": 0.0047, "step": 8848 }, { "epoch": 6.130239002424663, "grad_norm": 0.4058743417263031, "learning_rate": 3.870319001386963e-06, "loss": 0.0051, "step": 8849 }, { "epoch": 6.130931763075857, "grad_norm": 0.2035028338432312, "learning_rate": 3.8696255201109575e-06, "loss": 0.0032, "step": 8850 }, { "epoch": 6.131624523727052, "grad_norm": 0.34671515226364136, "learning_rate": 3.868932038834952e-06, "loss": 0.0033, "step": 8851 }, { "epoch": 6.132317284378248, "grad_norm": 0.27328479290008545, "learning_rate": 3.868238557558946e-06, "loss": 0.0042, "step": 8852 }, { "epoch": 6.133010045029442, "grad_norm": 0.16158245503902435, "learning_rate": 3.867545076282941e-06, "loss": 0.0039, "step": 8853 }, { "epoch": 6.133702805680637, "grad_norm": 0.2262895405292511, "learning_rate": 3.8668515950069356e-06, "loss": 0.0042, "step": 8854 }, { "epoch": 6.134395566331833, "grad_norm": 0.19910332560539246, "learning_rate": 3.86615811373093e-06, "loss": 0.0027, "step": 8855 }, { "epoch": 6.135088326983027, "grad_norm": 0.332500696182251, "learning_rate": 3.865464632454924e-06, "loss": 0.0065, "step": 8856 }, { "epoch": 6.135781087634222, "grad_norm": 0.20140956342220306, "learning_rate": 3.864771151178918e-06, "loss": 0.0036, "step": 8857 }, { "epoch": 6.1364738482854175, "grad_norm": 0.18646806478500366, "learning_rate": 3.864077669902913e-06, "loss": 0.0036, "step": 8858 }, { "epoch": 6.137166608936613, "grad_norm": 0.3805655539035797, "learning_rate": 3.863384188626908e-06, "loss": 0.0036, "step": 8859 }, { "epoch": 6.137859369587807, "grad_norm": 0.3395026922225952, "learning_rate": 3.862690707350902e-06, "loss": 0.0049, "step": 8860 }, { "epoch": 6.1385521302390025, "grad_norm": 0.14605139195919037, "learning_rate": 3.861997226074896e-06, "loss": 0.0024, "step": 8861 }, { "epoch": 6.139244890890198, "grad_norm": 0.22433006763458252, "learning_rate": 3.861303744798891e-06, "loss": 0.0035, "step": 8862 }, { "epoch": 6.139937651541392, "grad_norm": 0.20515076816082, "learning_rate": 3.860610263522886e-06, "loss": 0.0033, "step": 8863 }, { "epoch": 6.140630412192587, "grad_norm": 0.18175749480724335, "learning_rate": 3.85991678224688e-06, "loss": 0.0031, "step": 8864 }, { "epoch": 6.141323172843783, "grad_norm": 0.2024599313735962, "learning_rate": 3.859223300970874e-06, "loss": 0.0034, "step": 8865 }, { "epoch": 6.142015933494977, "grad_norm": 0.21010836958885193, "learning_rate": 3.858529819694868e-06, "loss": 0.0035, "step": 8866 }, { "epoch": 6.142708694146172, "grad_norm": 0.15664875507354736, "learning_rate": 3.857836338418863e-06, "loss": 0.0035, "step": 8867 }, { "epoch": 6.143401454797368, "grad_norm": 0.21486541628837585, "learning_rate": 3.857142857142858e-06, "loss": 0.0031, "step": 8868 }, { "epoch": 6.144094215448563, "grad_norm": 0.25232917070388794, "learning_rate": 3.856449375866852e-06, "loss": 0.0053, "step": 8869 }, { "epoch": 6.144786976099757, "grad_norm": 0.2670959234237671, "learning_rate": 3.855755894590846e-06, "loss": 0.003, "step": 8870 }, { "epoch": 6.145479736750953, "grad_norm": 0.16176125407218933, "learning_rate": 3.855062413314841e-06, "loss": 0.0033, "step": 8871 }, { "epoch": 6.146172497402148, "grad_norm": 0.16566051542758942, "learning_rate": 3.854368932038835e-06, "loss": 0.0036, "step": 8872 }, { "epoch": 6.146865258053342, "grad_norm": 0.297456294298172, "learning_rate": 3.85367545076283e-06, "loss": 0.0037, "step": 8873 }, { "epoch": 6.1475580187045376, "grad_norm": 0.16764581203460693, "learning_rate": 3.852981969486824e-06, "loss": 0.0034, "step": 8874 }, { "epoch": 6.148250779355733, "grad_norm": 0.2256138175725937, "learning_rate": 3.852288488210819e-06, "loss": 0.0033, "step": 8875 }, { "epoch": 6.148943540006927, "grad_norm": 0.23719216883182526, "learning_rate": 3.851595006934813e-06, "loss": 0.0033, "step": 8876 }, { "epoch": 6.1496363006581225, "grad_norm": 0.4487541913986206, "learning_rate": 3.850901525658807e-06, "loss": 0.0029, "step": 8877 }, { "epoch": 6.150329061309318, "grad_norm": 0.14292246103286743, "learning_rate": 3.850208044382802e-06, "loss": 0.0029, "step": 8878 }, { "epoch": 6.151021821960513, "grad_norm": 0.25498414039611816, "learning_rate": 3.849514563106796e-06, "loss": 0.004, "step": 8879 }, { "epoch": 6.1517145826117074, "grad_norm": 0.19159753620624542, "learning_rate": 3.848821081830791e-06, "loss": 0.0032, "step": 8880 }, { "epoch": 6.152407343262903, "grad_norm": 0.24804987013339996, "learning_rate": 3.848127600554785e-06, "loss": 0.0028, "step": 8881 }, { "epoch": 6.153100103914098, "grad_norm": 0.2274547666311264, "learning_rate": 3.847434119278779e-06, "loss": 0.0027, "step": 8882 }, { "epoch": 6.153792864565292, "grad_norm": 0.3069051206111908, "learning_rate": 3.846740638002774e-06, "loss": 0.0063, "step": 8883 }, { "epoch": 6.154485625216488, "grad_norm": 0.15738233923912048, "learning_rate": 3.846047156726769e-06, "loss": 0.0033, "step": 8884 }, { "epoch": 6.155178385867683, "grad_norm": 0.23025180399417877, "learning_rate": 3.845353675450763e-06, "loss": 0.0031, "step": 8885 }, { "epoch": 6.155871146518877, "grad_norm": 0.18047180771827698, "learning_rate": 3.844660194174757e-06, "loss": 0.0035, "step": 8886 }, { "epoch": 6.156563907170073, "grad_norm": 0.18933671712875366, "learning_rate": 3.843966712898751e-06, "loss": 0.003, "step": 8887 }, { "epoch": 6.157256667821268, "grad_norm": 0.9460041522979736, "learning_rate": 3.843273231622746e-06, "loss": 0.0063, "step": 8888 }, { "epoch": 6.157949428472463, "grad_norm": 0.16287913918495178, "learning_rate": 3.842579750346741e-06, "loss": 0.003, "step": 8889 }, { "epoch": 6.158642189123658, "grad_norm": 0.23413816094398499, "learning_rate": 3.841886269070735e-06, "loss": 0.0043, "step": 8890 }, { "epoch": 6.159334949774853, "grad_norm": 0.1540103405714035, "learning_rate": 3.8411927877947295e-06, "loss": 0.0027, "step": 8891 }, { "epoch": 6.160027710426048, "grad_norm": 0.22090907394886017, "learning_rate": 3.840499306518724e-06, "loss": 0.0036, "step": 8892 }, { "epoch": 6.1607204710772425, "grad_norm": 0.5158216953277588, "learning_rate": 3.839805825242719e-06, "loss": 0.0063, "step": 8893 }, { "epoch": 6.161413231728438, "grad_norm": 0.14484673738479614, "learning_rate": 3.839112343966713e-06, "loss": 0.0027, "step": 8894 }, { "epoch": 6.162105992379633, "grad_norm": 0.4610350430011749, "learning_rate": 3.8384188626907075e-06, "loss": 0.01, "step": 8895 }, { "epoch": 6.1627987530308275, "grad_norm": 0.1954057514667511, "learning_rate": 3.837725381414702e-06, "loss": 0.0023, "step": 8896 }, { "epoch": 6.163491513682023, "grad_norm": 0.1904076486825943, "learning_rate": 3.8370319001386965e-06, "loss": 0.0041, "step": 8897 }, { "epoch": 6.164184274333218, "grad_norm": 0.3598301112651825, "learning_rate": 3.8363384188626914e-06, "loss": 0.0023, "step": 8898 }, { "epoch": 6.164877034984413, "grad_norm": 0.3696455657482147, "learning_rate": 3.8356449375866855e-06, "loss": 0.0053, "step": 8899 }, { "epoch": 6.165569795635608, "grad_norm": 0.22022351622581482, "learning_rate": 3.83495145631068e-06, "loss": 0.0041, "step": 8900 }, { "epoch": 6.166262556286803, "grad_norm": 0.13488420844078064, "learning_rate": 3.8342579750346746e-06, "loss": 0.0026, "step": 8901 }, { "epoch": 6.166955316937998, "grad_norm": 0.18695005774497986, "learning_rate": 3.833564493758669e-06, "loss": 0.003, "step": 8902 }, { "epoch": 6.167648077589193, "grad_norm": 0.2914468050003052, "learning_rate": 3.8328710124826636e-06, "loss": 0.0055, "step": 8903 }, { "epoch": 6.168340838240388, "grad_norm": 0.28238797187805176, "learning_rate": 3.832177531206658e-06, "loss": 0.0054, "step": 8904 }, { "epoch": 6.169033598891583, "grad_norm": 0.46053773164749146, "learning_rate": 3.831484049930652e-06, "loss": 0.0053, "step": 8905 }, { "epoch": 6.169726359542778, "grad_norm": 0.23768968880176544, "learning_rate": 3.830790568654647e-06, "loss": 0.004, "step": 8906 }, { "epoch": 6.170419120193973, "grad_norm": 0.26686280965805054, "learning_rate": 3.830097087378641e-06, "loss": 0.0042, "step": 8907 }, { "epoch": 6.171111880845168, "grad_norm": 0.3420552611351013, "learning_rate": 3.829403606102636e-06, "loss": 0.0049, "step": 8908 }, { "epoch": 6.171804641496363, "grad_norm": 0.2604672312736511, "learning_rate": 3.82871012482663e-06, "loss": 0.0041, "step": 8909 }, { "epoch": 6.172497402147558, "grad_norm": 0.18973568081855774, "learning_rate": 3.828016643550625e-06, "loss": 0.0027, "step": 8910 }, { "epoch": 6.173190162798753, "grad_norm": 0.2012564092874527, "learning_rate": 3.827323162274619e-06, "loss": 0.0037, "step": 8911 }, { "epoch": 6.173882923449948, "grad_norm": 0.1997278928756714, "learning_rate": 3.826629680998613e-06, "loss": 0.0043, "step": 8912 }, { "epoch": 6.174575684101143, "grad_norm": 0.5193144679069519, "learning_rate": 3.825936199722608e-06, "loss": 0.0067, "step": 8913 }, { "epoch": 6.175268444752338, "grad_norm": 0.26644983887672424, "learning_rate": 3.825242718446602e-06, "loss": 0.0036, "step": 8914 }, { "epoch": 6.175961205403533, "grad_norm": 0.15375620126724243, "learning_rate": 3.824549237170597e-06, "loss": 0.0032, "step": 8915 }, { "epoch": 6.176653966054728, "grad_norm": 0.1635560542345047, "learning_rate": 3.823855755894591e-06, "loss": 0.003, "step": 8916 }, { "epoch": 6.177346726705923, "grad_norm": 0.5810548067092896, "learning_rate": 3.823162274618585e-06, "loss": 0.0041, "step": 8917 }, { "epoch": 6.178039487357118, "grad_norm": 0.21411103010177612, "learning_rate": 3.82246879334258e-06, "loss": 0.0034, "step": 8918 }, { "epoch": 6.1787322480083136, "grad_norm": 0.17415635287761688, "learning_rate": 3.821775312066575e-06, "loss": 0.0026, "step": 8919 }, { "epoch": 6.179425008659508, "grad_norm": 0.17464856803417206, "learning_rate": 3.821081830790569e-06, "loss": 0.0024, "step": 8920 }, { "epoch": 6.180117769310703, "grad_norm": 0.1878119260072708, "learning_rate": 3.820388349514563e-06, "loss": 0.0034, "step": 8921 }, { "epoch": 6.1808105299618985, "grad_norm": 0.2702119052410126, "learning_rate": 3.819694868238558e-06, "loss": 0.004, "step": 8922 }, { "epoch": 6.181503290613093, "grad_norm": 0.2339838743209839, "learning_rate": 3.819001386962553e-06, "loss": 0.0031, "step": 8923 }, { "epoch": 6.182196051264288, "grad_norm": 0.18894006311893463, "learning_rate": 3.818307905686547e-06, "loss": 0.0038, "step": 8924 }, { "epoch": 6.1828888119154835, "grad_norm": 0.13782832026481628, "learning_rate": 3.817614424410541e-06, "loss": 0.0031, "step": 8925 }, { "epoch": 6.183581572566678, "grad_norm": 0.19864635169506073, "learning_rate": 3.816920943134535e-06, "loss": 0.0045, "step": 8926 }, { "epoch": 6.184274333217873, "grad_norm": 1.2593319416046143, "learning_rate": 3.81622746185853e-06, "loss": 0.006, "step": 8927 }, { "epoch": 6.184967093869068, "grad_norm": 0.1914646029472351, "learning_rate": 3.815533980582525e-06, "loss": 0.0035, "step": 8928 }, { "epoch": 6.185659854520264, "grad_norm": 0.20021909475326538, "learning_rate": 3.814840499306519e-06, "loss": 0.003, "step": 8929 }, { "epoch": 6.186352615171458, "grad_norm": 0.3417031764984131, "learning_rate": 3.8141470180305136e-06, "loss": 0.0041, "step": 8930 }, { "epoch": 6.187045375822653, "grad_norm": 0.13619321584701538, "learning_rate": 3.8134535367545077e-06, "loss": 0.0027, "step": 8931 }, { "epoch": 6.187738136473849, "grad_norm": 0.2461363822221756, "learning_rate": 3.8127600554785022e-06, "loss": 0.0028, "step": 8932 }, { "epoch": 6.188430897125043, "grad_norm": 0.10541187971830368, "learning_rate": 3.812066574202497e-06, "loss": 0.002, "step": 8933 }, { "epoch": 6.189123657776238, "grad_norm": 0.1136985495686531, "learning_rate": 3.8113730929264913e-06, "loss": 0.0027, "step": 8934 }, { "epoch": 6.189816418427434, "grad_norm": 0.31341642141342163, "learning_rate": 3.8106796116504858e-06, "loss": 0.004, "step": 8935 }, { "epoch": 6.190509179078628, "grad_norm": 0.23905427753925323, "learning_rate": 3.80998613037448e-06, "loss": 0.0039, "step": 8936 }, { "epoch": 6.191201939729823, "grad_norm": 0.28960779309272766, "learning_rate": 3.8092926490984744e-06, "loss": 0.0045, "step": 8937 }, { "epoch": 6.1918947003810185, "grad_norm": 0.1412201076745987, "learning_rate": 3.8085991678224693e-06, "loss": 0.0028, "step": 8938 }, { "epoch": 6.192587461032213, "grad_norm": 0.13628625869750977, "learning_rate": 3.807905686546464e-06, "loss": 0.0025, "step": 8939 }, { "epoch": 6.193280221683408, "grad_norm": 0.1394803524017334, "learning_rate": 3.807212205270458e-06, "loss": 0.0027, "step": 8940 }, { "epoch": 6.1939729823346035, "grad_norm": 0.35515934228897095, "learning_rate": 3.8065187239944524e-06, "loss": 0.0035, "step": 8941 }, { "epoch": 6.194665742985799, "grad_norm": 0.5569301843643188, "learning_rate": 3.8058252427184465e-06, "loss": 0.0041, "step": 8942 }, { "epoch": 6.195358503636993, "grad_norm": 0.3013700544834137, "learning_rate": 3.8051317614424414e-06, "loss": 0.0033, "step": 8943 }, { "epoch": 6.196051264288188, "grad_norm": 0.25852084159851074, "learning_rate": 3.804438280166436e-06, "loss": 0.0037, "step": 8944 }, { "epoch": 6.196744024939384, "grad_norm": 0.3149358928203583, "learning_rate": 3.8037447988904304e-06, "loss": 0.0043, "step": 8945 }, { "epoch": 6.197436785590578, "grad_norm": 0.5216568112373352, "learning_rate": 3.8030513176144245e-06, "loss": 0.0046, "step": 8946 }, { "epoch": 6.198129546241773, "grad_norm": 0.4101194739341736, "learning_rate": 3.802357836338419e-06, "loss": 0.0041, "step": 8947 }, { "epoch": 6.198822306892969, "grad_norm": 0.2276059091091156, "learning_rate": 3.801664355062414e-06, "loss": 0.0038, "step": 8948 }, { "epoch": 6.199515067544164, "grad_norm": 0.37515920400619507, "learning_rate": 3.800970873786408e-06, "loss": 0.0043, "step": 8949 }, { "epoch": 6.200207828195358, "grad_norm": 0.3137504458427429, "learning_rate": 3.8002773925104026e-06, "loss": 0.0032, "step": 8950 }, { "epoch": 6.200900588846554, "grad_norm": 0.21343262493610382, "learning_rate": 3.7995839112343967e-06, "loss": 0.0037, "step": 8951 }, { "epoch": 6.201593349497749, "grad_norm": 0.17816847562789917, "learning_rate": 3.798890429958391e-06, "loss": 0.0037, "step": 8952 }, { "epoch": 6.202286110148943, "grad_norm": 0.386277973651886, "learning_rate": 3.798196948682386e-06, "loss": 0.0051, "step": 8953 }, { "epoch": 6.2029788708001385, "grad_norm": 0.3896544277667999, "learning_rate": 3.7975034674063806e-06, "loss": 0.0054, "step": 8954 }, { "epoch": 6.203671631451334, "grad_norm": 0.2156912237405777, "learning_rate": 3.7968099861303747e-06, "loss": 0.0029, "step": 8955 }, { "epoch": 6.204364392102528, "grad_norm": 0.17616605758666992, "learning_rate": 3.796116504854369e-06, "loss": 0.0028, "step": 8956 }, { "epoch": 6.2050571527537235, "grad_norm": 0.20233075320720673, "learning_rate": 3.7954230235783633e-06, "loss": 0.003, "step": 8957 }, { "epoch": 6.205749913404919, "grad_norm": 0.44021105766296387, "learning_rate": 3.7947295423023582e-06, "loss": 0.0054, "step": 8958 }, { "epoch": 6.206442674056113, "grad_norm": 0.2861934006214142, "learning_rate": 3.7940360610263527e-06, "loss": 0.0032, "step": 8959 }, { "epoch": 6.207135434707308, "grad_norm": 0.39539748430252075, "learning_rate": 3.793342579750347e-06, "loss": 0.0044, "step": 8960 }, { "epoch": 6.207828195358504, "grad_norm": 0.2111402004957199, "learning_rate": 3.7926490984743413e-06, "loss": 0.0041, "step": 8961 }, { "epoch": 6.208520956009699, "grad_norm": 0.22310729324817657, "learning_rate": 3.791955617198336e-06, "loss": 0.0048, "step": 8962 }, { "epoch": 6.209213716660893, "grad_norm": 0.18399444222450256, "learning_rate": 3.7912621359223308e-06, "loss": 0.0035, "step": 8963 }, { "epoch": 6.209906477312089, "grad_norm": 0.18670117855072021, "learning_rate": 3.790568654646325e-06, "loss": 0.0042, "step": 8964 }, { "epoch": 6.210599237963284, "grad_norm": 0.5374297499656677, "learning_rate": 3.7898751733703194e-06, "loss": 0.0046, "step": 8965 }, { "epoch": 6.211291998614478, "grad_norm": 0.24439114332199097, "learning_rate": 3.7891816920943135e-06, "loss": 0.0051, "step": 8966 }, { "epoch": 6.211984759265674, "grad_norm": 0.2810538411140442, "learning_rate": 3.788488210818308e-06, "loss": 0.0049, "step": 8967 }, { "epoch": 6.212677519916869, "grad_norm": 0.2148081213235855, "learning_rate": 3.787794729542303e-06, "loss": 0.0033, "step": 8968 }, { "epoch": 6.213370280568064, "grad_norm": 0.13280628621578217, "learning_rate": 3.7871012482662974e-06, "loss": 0.0024, "step": 8969 }, { "epoch": 6.214063041219259, "grad_norm": 0.152400404214859, "learning_rate": 3.7864077669902915e-06, "loss": 0.0027, "step": 8970 }, { "epoch": 6.214755801870454, "grad_norm": 0.14377716183662415, "learning_rate": 3.785714285714286e-06, "loss": 0.0025, "step": 8971 }, { "epoch": 6.215448562521649, "grad_norm": 0.17485810816287994, "learning_rate": 3.78502080443828e-06, "loss": 0.0029, "step": 8972 }, { "epoch": 6.2161413231728435, "grad_norm": 0.19294202327728271, "learning_rate": 3.784327323162275e-06, "loss": 0.0038, "step": 8973 }, { "epoch": 6.216834083824039, "grad_norm": 0.23651130497455597, "learning_rate": 3.7836338418862695e-06, "loss": 0.003, "step": 8974 }, { "epoch": 6.217526844475234, "grad_norm": 0.22184260189533234, "learning_rate": 3.7829403606102636e-06, "loss": 0.0045, "step": 8975 }, { "epoch": 6.2182196051264285, "grad_norm": 0.24723675847053528, "learning_rate": 3.782246879334258e-06, "loss": 0.0033, "step": 8976 }, { "epoch": 6.218912365777624, "grad_norm": 0.20020835101604462, "learning_rate": 3.7815533980582526e-06, "loss": 0.0054, "step": 8977 }, { "epoch": 6.219605126428819, "grad_norm": 0.25527140498161316, "learning_rate": 3.7808599167822476e-06, "loss": 0.0037, "step": 8978 }, { "epoch": 6.220297887080013, "grad_norm": 0.354015976190567, "learning_rate": 3.7801664355062417e-06, "loss": 0.0029, "step": 8979 }, { "epoch": 6.220990647731209, "grad_norm": 0.23956342041492462, "learning_rate": 3.779472954230236e-06, "loss": 0.0043, "step": 8980 }, { "epoch": 6.221683408382404, "grad_norm": 0.1630607545375824, "learning_rate": 3.7787794729542303e-06, "loss": 0.0029, "step": 8981 }, { "epoch": 6.222376169033599, "grad_norm": 0.2843337059020996, "learning_rate": 3.7780859916782248e-06, "loss": 0.0064, "step": 8982 }, { "epoch": 6.223068929684794, "grad_norm": 0.28237929940223694, "learning_rate": 3.7773925104022197e-06, "loss": 0.0042, "step": 8983 }, { "epoch": 6.223761690335989, "grad_norm": 0.409702330827713, "learning_rate": 3.776699029126214e-06, "loss": 0.0052, "step": 8984 }, { "epoch": 6.224454450987184, "grad_norm": 0.16289186477661133, "learning_rate": 3.7760055478502083e-06, "loss": 0.0027, "step": 8985 }, { "epoch": 6.225147211638379, "grad_norm": 0.20507176220417023, "learning_rate": 3.775312066574203e-06, "loss": 0.0028, "step": 8986 }, { "epoch": 6.225839972289574, "grad_norm": 0.20279560983181, "learning_rate": 3.774618585298197e-06, "loss": 0.0028, "step": 8987 }, { "epoch": 6.226532732940769, "grad_norm": 0.1872999668121338, "learning_rate": 3.773925104022192e-06, "loss": 0.0036, "step": 8988 }, { "epoch": 6.227225493591964, "grad_norm": 0.147945836186409, "learning_rate": 3.7732316227461863e-06, "loss": 0.0028, "step": 8989 }, { "epoch": 6.227918254243159, "grad_norm": 0.4329070448875427, "learning_rate": 3.7725381414701804e-06, "loss": 0.0034, "step": 8990 }, { "epoch": 6.228611014894354, "grad_norm": 0.21519504487514496, "learning_rate": 3.771844660194175e-06, "loss": 0.0036, "step": 8991 }, { "epoch": 6.229303775545549, "grad_norm": 0.5421343445777893, "learning_rate": 3.7711511789181694e-06, "loss": 0.0045, "step": 8992 }, { "epoch": 6.229996536196744, "grad_norm": 0.2251807004213333, "learning_rate": 3.7704576976421644e-06, "loss": 0.0034, "step": 8993 }, { "epoch": 6.230689296847939, "grad_norm": 0.23039962351322174, "learning_rate": 3.7697642163661585e-06, "loss": 0.0026, "step": 8994 }, { "epoch": 6.231382057499134, "grad_norm": 0.2990110218524933, "learning_rate": 3.769070735090153e-06, "loss": 0.0033, "step": 8995 }, { "epoch": 6.232074818150329, "grad_norm": 0.5308482050895691, "learning_rate": 3.768377253814147e-06, "loss": 0.0049, "step": 8996 }, { "epoch": 6.232767578801524, "grad_norm": 0.20214372873306274, "learning_rate": 3.7676837725381416e-06, "loss": 0.0023, "step": 8997 }, { "epoch": 6.233460339452719, "grad_norm": 0.175472691655159, "learning_rate": 3.7669902912621365e-06, "loss": 0.0025, "step": 8998 }, { "epoch": 6.234153100103914, "grad_norm": 0.22035038471221924, "learning_rate": 3.7662968099861306e-06, "loss": 0.0034, "step": 8999 }, { "epoch": 6.234845860755109, "grad_norm": 0.3284253478050232, "learning_rate": 3.765603328710125e-06, "loss": 0.0037, "step": 9000 }, { "epoch": 6.235538621406304, "grad_norm": 0.20636343955993652, "learning_rate": 3.7649098474341196e-06, "loss": 0.0028, "step": 9001 }, { "epoch": 6.2362313820574995, "grad_norm": 0.40715131163597107, "learning_rate": 3.7642163661581137e-06, "loss": 0.0054, "step": 9002 }, { "epoch": 6.236924142708694, "grad_norm": 0.428727388381958, "learning_rate": 3.7635228848821086e-06, "loss": 0.0037, "step": 9003 }, { "epoch": 6.237616903359889, "grad_norm": 0.28343698382377625, "learning_rate": 3.762829403606103e-06, "loss": 0.0048, "step": 9004 }, { "epoch": 6.238309664011084, "grad_norm": 0.4409272372722626, "learning_rate": 3.7621359223300972e-06, "loss": 0.0047, "step": 9005 }, { "epoch": 6.239002424662279, "grad_norm": 0.167841374874115, "learning_rate": 3.7614424410540917e-06, "loss": 0.003, "step": 9006 }, { "epoch": 6.239695185313474, "grad_norm": 0.2910407781600952, "learning_rate": 3.7607489597780863e-06, "loss": 0.0036, "step": 9007 }, { "epoch": 6.240387945964669, "grad_norm": 0.23353824019432068, "learning_rate": 3.7600554785020808e-06, "loss": 0.0033, "step": 9008 }, { "epoch": 6.241080706615865, "grad_norm": 0.17986933887004852, "learning_rate": 3.7593619972260753e-06, "loss": 0.0041, "step": 9009 }, { "epoch": 6.241773467267059, "grad_norm": 0.27407851815223694, "learning_rate": 3.7586685159500698e-06, "loss": 0.0045, "step": 9010 }, { "epoch": 6.242466227918254, "grad_norm": 0.42741575837135315, "learning_rate": 3.757975034674064e-06, "loss": 0.0036, "step": 9011 }, { "epoch": 6.24315898856945, "grad_norm": 0.17926374077796936, "learning_rate": 3.7572815533980584e-06, "loss": 0.0026, "step": 9012 }, { "epoch": 6.243851749220644, "grad_norm": 0.1855120211839676, "learning_rate": 3.7565880721220533e-06, "loss": 0.0033, "step": 9013 }, { "epoch": 6.244544509871839, "grad_norm": 0.16290566325187683, "learning_rate": 3.7558945908460474e-06, "loss": 0.0027, "step": 9014 }, { "epoch": 6.245237270523035, "grad_norm": 0.20021256804466248, "learning_rate": 3.755201109570042e-06, "loss": 0.0032, "step": 9015 }, { "epoch": 6.245930031174229, "grad_norm": 0.1684054583311081, "learning_rate": 3.7545076282940364e-06, "loss": 0.0035, "step": 9016 }, { "epoch": 6.246622791825424, "grad_norm": 0.34178709983825684, "learning_rate": 3.7538141470180305e-06, "loss": 0.0046, "step": 9017 }, { "epoch": 6.2473155524766195, "grad_norm": 0.19220145046710968, "learning_rate": 3.7531206657420254e-06, "loss": 0.0034, "step": 9018 }, { "epoch": 6.248008313127814, "grad_norm": 0.20446428656578064, "learning_rate": 3.75242718446602e-06, "loss": 0.0035, "step": 9019 }, { "epoch": 6.248701073779009, "grad_norm": 0.3254857361316681, "learning_rate": 3.751733703190014e-06, "loss": 0.0051, "step": 9020 }, { "epoch": 6.2493938344302045, "grad_norm": 0.2723582684993744, "learning_rate": 3.7510402219140085e-06, "loss": 0.0058, "step": 9021 }, { "epoch": 6.2500865950814, "grad_norm": 0.15010446310043335, "learning_rate": 3.750346740638003e-06, "loss": 0.0032, "step": 9022 }, { "epoch": 6.250779355732594, "grad_norm": 0.1501779556274414, "learning_rate": 3.7496532593619976e-06, "loss": 0.0024, "step": 9023 }, { "epoch": 6.251472116383789, "grad_norm": 0.17819537222385406, "learning_rate": 3.748959778085992e-06, "loss": 0.0032, "step": 9024 }, { "epoch": 6.252164877034985, "grad_norm": 0.28279098868370056, "learning_rate": 3.7482662968099866e-06, "loss": 0.0039, "step": 9025 }, { "epoch": 6.252857637686179, "grad_norm": 0.3389202952384949, "learning_rate": 3.7475728155339807e-06, "loss": 0.0036, "step": 9026 }, { "epoch": 6.253550398337374, "grad_norm": 0.21087242662906647, "learning_rate": 3.746879334257975e-06, "loss": 0.0037, "step": 9027 }, { "epoch": 6.25424315898857, "grad_norm": 0.3403334617614746, "learning_rate": 3.74618585298197e-06, "loss": 0.004, "step": 9028 }, { "epoch": 6.254935919639765, "grad_norm": 0.28383970260620117, "learning_rate": 3.745492371705964e-06, "loss": 0.0035, "step": 9029 }, { "epoch": 6.255628680290959, "grad_norm": 0.49126681685447693, "learning_rate": 3.7447988904299587e-06, "loss": 0.0046, "step": 9030 }, { "epoch": 6.256321440942155, "grad_norm": 0.3545149266719818, "learning_rate": 3.7441054091539532e-06, "loss": 0.0038, "step": 9031 }, { "epoch": 6.25701420159335, "grad_norm": 0.36593419313430786, "learning_rate": 3.7434119278779473e-06, "loss": 0.0044, "step": 9032 }, { "epoch": 6.257706962244544, "grad_norm": 0.15204544365406036, "learning_rate": 3.7427184466019422e-06, "loss": 0.0033, "step": 9033 }, { "epoch": 6.2583997228957395, "grad_norm": 0.19848747551441193, "learning_rate": 3.7420249653259367e-06, "loss": 0.0027, "step": 9034 }, { "epoch": 6.259092483546935, "grad_norm": 0.26260146498680115, "learning_rate": 3.741331484049931e-06, "loss": 0.0028, "step": 9035 }, { "epoch": 6.259785244198129, "grad_norm": 0.19948506355285645, "learning_rate": 3.7406380027739253e-06, "loss": 0.0033, "step": 9036 }, { "epoch": 6.2604780048493245, "grad_norm": 0.17205795645713806, "learning_rate": 3.73994452149792e-06, "loss": 0.0031, "step": 9037 }, { "epoch": 6.26117076550052, "grad_norm": 0.4595890939235687, "learning_rate": 3.7392510402219144e-06, "loss": 0.0046, "step": 9038 }, { "epoch": 6.261863526151714, "grad_norm": 0.25202450156211853, "learning_rate": 3.738557558945909e-06, "loss": 0.0054, "step": 9039 }, { "epoch": 6.262556286802909, "grad_norm": 0.2603699862957001, "learning_rate": 3.7378640776699034e-06, "loss": 0.0036, "step": 9040 }, { "epoch": 6.263249047454105, "grad_norm": 0.19549359381198883, "learning_rate": 3.7371705963938975e-06, "loss": 0.0041, "step": 9041 }, { "epoch": 6.2639418081053, "grad_norm": 0.19719092547893524, "learning_rate": 3.736477115117892e-06, "loss": 0.0046, "step": 9042 }, { "epoch": 6.264634568756494, "grad_norm": 0.2393694818019867, "learning_rate": 3.735783633841887e-06, "loss": 0.0029, "step": 9043 }, { "epoch": 6.26532732940769, "grad_norm": 0.2887903153896332, "learning_rate": 3.735090152565881e-06, "loss": 0.0049, "step": 9044 }, { "epoch": 6.266020090058885, "grad_norm": 0.39004719257354736, "learning_rate": 3.7343966712898755e-06, "loss": 0.0063, "step": 9045 }, { "epoch": 6.266712850710079, "grad_norm": 0.3693449795246124, "learning_rate": 3.73370319001387e-06, "loss": 0.0035, "step": 9046 }, { "epoch": 6.267405611361275, "grad_norm": 0.34467846155166626, "learning_rate": 3.733009708737864e-06, "loss": 0.0039, "step": 9047 }, { "epoch": 6.26809837201247, "grad_norm": 0.3364553451538086, "learning_rate": 3.732316227461859e-06, "loss": 0.0039, "step": 9048 }, { "epoch": 6.268791132663665, "grad_norm": 0.23317886888980865, "learning_rate": 3.7316227461858535e-06, "loss": 0.0035, "step": 9049 }, { "epoch": 6.26948389331486, "grad_norm": 0.42350003123283386, "learning_rate": 3.7309292649098476e-06, "loss": 0.0063, "step": 9050 }, { "epoch": 6.270176653966055, "grad_norm": 0.3047276437282562, "learning_rate": 3.730235783633842e-06, "loss": 0.0047, "step": 9051 }, { "epoch": 6.27086941461725, "grad_norm": 0.10471628606319427, "learning_rate": 3.7295423023578362e-06, "loss": 0.0023, "step": 9052 }, { "epoch": 6.2715621752684445, "grad_norm": 0.18865561485290527, "learning_rate": 3.728848821081831e-06, "loss": 0.0044, "step": 9053 }, { "epoch": 6.27225493591964, "grad_norm": 0.41251006722450256, "learning_rate": 3.7281553398058257e-06, "loss": 0.0037, "step": 9054 }, { "epoch": 6.272947696570835, "grad_norm": 0.3191385567188263, "learning_rate": 3.72746185852982e-06, "loss": 0.0038, "step": 9055 }, { "epoch": 6.2736404572220295, "grad_norm": 0.20665396749973297, "learning_rate": 3.7267683772538143e-06, "loss": 0.0043, "step": 9056 }, { "epoch": 6.274333217873225, "grad_norm": 0.19908887147903442, "learning_rate": 3.7260748959778088e-06, "loss": 0.0038, "step": 9057 }, { "epoch": 6.27502597852442, "grad_norm": 0.17986100912094116, "learning_rate": 3.7253814147018037e-06, "loss": 0.0035, "step": 9058 }, { "epoch": 6.275718739175614, "grad_norm": 0.280498743057251, "learning_rate": 3.724687933425798e-06, "loss": 0.0049, "step": 9059 }, { "epoch": 6.27641149982681, "grad_norm": 0.13933268189430237, "learning_rate": 3.7239944521497923e-06, "loss": 0.0028, "step": 9060 }, { "epoch": 6.277104260478005, "grad_norm": 0.15762725472450256, "learning_rate": 3.723300970873787e-06, "loss": 0.0027, "step": 9061 }, { "epoch": 6.2777970211292, "grad_norm": 0.25061821937561035, "learning_rate": 3.722607489597781e-06, "loss": 0.0029, "step": 9062 }, { "epoch": 6.278489781780395, "grad_norm": 0.2139478325843811, "learning_rate": 3.721914008321776e-06, "loss": 0.0048, "step": 9063 }, { "epoch": 6.27918254243159, "grad_norm": 0.2980344295501709, "learning_rate": 3.7212205270457703e-06, "loss": 0.0043, "step": 9064 }, { "epoch": 6.279875303082785, "grad_norm": 0.17846964299678802, "learning_rate": 3.7205270457697644e-06, "loss": 0.0028, "step": 9065 }, { "epoch": 6.28056806373398, "grad_norm": 0.3059777319431305, "learning_rate": 3.719833564493759e-06, "loss": 0.0046, "step": 9066 }, { "epoch": 6.281260824385175, "grad_norm": 0.3137226700782776, "learning_rate": 3.719140083217753e-06, "loss": 0.0063, "step": 9067 }, { "epoch": 6.28195358503637, "grad_norm": 0.1962714046239853, "learning_rate": 3.718446601941748e-06, "loss": 0.0033, "step": 9068 }, { "epoch": 6.282646345687565, "grad_norm": 0.11992142349481583, "learning_rate": 3.7177531206657425e-06, "loss": 0.0024, "step": 9069 }, { "epoch": 6.28333910633876, "grad_norm": 0.1776193380355835, "learning_rate": 3.717059639389737e-06, "loss": 0.0031, "step": 9070 }, { "epoch": 6.284031866989955, "grad_norm": 0.25049513578414917, "learning_rate": 3.716366158113731e-06, "loss": 0.0043, "step": 9071 }, { "epoch": 6.28472462764115, "grad_norm": 0.33176901936531067, "learning_rate": 3.7156726768377256e-06, "loss": 0.0039, "step": 9072 }, { "epoch": 6.285417388292345, "grad_norm": 0.37309005856513977, "learning_rate": 3.7149791955617205e-06, "loss": 0.005, "step": 9073 }, { "epoch": 6.28611014894354, "grad_norm": 0.18733099102973938, "learning_rate": 3.7142857142857146e-06, "loss": 0.0029, "step": 9074 }, { "epoch": 6.286802909594735, "grad_norm": 0.3493751585483551, "learning_rate": 3.713592233009709e-06, "loss": 0.0033, "step": 9075 }, { "epoch": 6.28749567024593, "grad_norm": 0.1516132354736328, "learning_rate": 3.712898751733703e-06, "loss": 0.0029, "step": 9076 }, { "epoch": 6.288188430897125, "grad_norm": 0.24067988991737366, "learning_rate": 3.7122052704576977e-06, "loss": 0.0038, "step": 9077 }, { "epoch": 6.28888119154832, "grad_norm": 0.15279389917850494, "learning_rate": 3.7115117891816926e-06, "loss": 0.0038, "step": 9078 }, { "epoch": 6.289573952199515, "grad_norm": 0.1782047152519226, "learning_rate": 3.710818307905687e-06, "loss": 0.0029, "step": 9079 }, { "epoch": 6.29026671285071, "grad_norm": 0.17505787312984467, "learning_rate": 3.7101248266296812e-06, "loss": 0.0032, "step": 9080 }, { "epoch": 6.290959473501905, "grad_norm": 0.22981473803520203, "learning_rate": 3.7094313453536757e-06, "loss": 0.0033, "step": 9081 }, { "epoch": 6.2916522341531005, "grad_norm": 0.16924026608467102, "learning_rate": 3.70873786407767e-06, "loss": 0.0029, "step": 9082 }, { "epoch": 6.292344994804295, "grad_norm": 0.18179747462272644, "learning_rate": 3.7080443828016648e-06, "loss": 0.0029, "step": 9083 }, { "epoch": 6.29303775545549, "grad_norm": 0.21053244173526764, "learning_rate": 3.7073509015256593e-06, "loss": 0.0031, "step": 9084 }, { "epoch": 6.293730516106685, "grad_norm": 0.19924941658973694, "learning_rate": 3.7066574202496538e-06, "loss": 0.0039, "step": 9085 }, { "epoch": 6.29442327675788, "grad_norm": 0.2505203187465668, "learning_rate": 3.705963938973648e-06, "loss": 0.0034, "step": 9086 }, { "epoch": 6.295116037409075, "grad_norm": 0.20534180104732513, "learning_rate": 3.7052704576976424e-06, "loss": 0.0036, "step": 9087 }, { "epoch": 6.29580879806027, "grad_norm": 0.19099000096321106, "learning_rate": 3.7045769764216373e-06, "loss": 0.0027, "step": 9088 }, { "epoch": 6.296501558711465, "grad_norm": 0.1680712103843689, "learning_rate": 3.7038834951456314e-06, "loss": 0.0037, "step": 9089 }, { "epoch": 6.29719431936266, "grad_norm": 0.3262440860271454, "learning_rate": 3.703190013869626e-06, "loss": 0.0043, "step": 9090 }, { "epoch": 6.297887080013855, "grad_norm": 0.21914781630039215, "learning_rate": 3.70249653259362e-06, "loss": 0.0034, "step": 9091 }, { "epoch": 6.298579840665051, "grad_norm": 0.19973184168338776, "learning_rate": 3.7018030513176145e-06, "loss": 0.004, "step": 9092 }, { "epoch": 6.299272601316245, "grad_norm": 0.7196505069732666, "learning_rate": 3.7011095700416094e-06, "loss": 0.0044, "step": 9093 }, { "epoch": 6.29996536196744, "grad_norm": 0.15338726341724396, "learning_rate": 3.700416088765604e-06, "loss": 0.0031, "step": 9094 }, { "epoch": 6.300658122618636, "grad_norm": 0.5259151458740234, "learning_rate": 3.699722607489598e-06, "loss": 0.0036, "step": 9095 }, { "epoch": 6.30135088326983, "grad_norm": 0.5824711918830872, "learning_rate": 3.6990291262135925e-06, "loss": 0.0062, "step": 9096 }, { "epoch": 6.302043643921025, "grad_norm": 0.2186335027217865, "learning_rate": 3.6983356449375866e-06, "loss": 0.0028, "step": 9097 }, { "epoch": 6.3027364045722205, "grad_norm": 0.26481255888938904, "learning_rate": 3.6976421636615816e-06, "loss": 0.0036, "step": 9098 }, { "epoch": 6.303429165223415, "grad_norm": 0.1980435848236084, "learning_rate": 3.696948682385576e-06, "loss": 0.0039, "step": 9099 }, { "epoch": 6.30412192587461, "grad_norm": 0.3058741092681885, "learning_rate": 3.69625520110957e-06, "loss": 0.0045, "step": 9100 }, { "epoch": 6.3048146865258055, "grad_norm": 0.1924455761909485, "learning_rate": 3.6955617198335647e-06, "loss": 0.0039, "step": 9101 }, { "epoch": 6.305507447177001, "grad_norm": 0.45326587557792664, "learning_rate": 3.694868238557559e-06, "loss": 0.0062, "step": 9102 }, { "epoch": 6.306200207828195, "grad_norm": 0.18722014129161835, "learning_rate": 3.694174757281554e-06, "loss": 0.0037, "step": 9103 }, { "epoch": 6.30689296847939, "grad_norm": 0.17146499454975128, "learning_rate": 3.693481276005548e-06, "loss": 0.0026, "step": 9104 }, { "epoch": 6.307585729130586, "grad_norm": 0.31631070375442505, "learning_rate": 3.6927877947295427e-06, "loss": 0.0036, "step": 9105 }, { "epoch": 6.30827848978178, "grad_norm": 0.15640586614608765, "learning_rate": 3.692094313453537e-06, "loss": 0.0029, "step": 9106 }, { "epoch": 6.308971250432975, "grad_norm": 0.23377051949501038, "learning_rate": 3.6914008321775313e-06, "loss": 0.005, "step": 9107 }, { "epoch": 6.309664011084171, "grad_norm": 0.19398950040340424, "learning_rate": 3.6907073509015262e-06, "loss": 0.0029, "step": 9108 }, { "epoch": 6.310356771735365, "grad_norm": 0.3420952558517456, "learning_rate": 3.6900138696255208e-06, "loss": 0.0034, "step": 9109 }, { "epoch": 6.31104953238656, "grad_norm": 0.29911771416664124, "learning_rate": 3.689320388349515e-06, "loss": 0.0048, "step": 9110 }, { "epoch": 6.311742293037756, "grad_norm": 0.2814820408821106, "learning_rate": 3.6886269070735094e-06, "loss": 0.0061, "step": 9111 }, { "epoch": 6.312435053688951, "grad_norm": 0.22791898250579834, "learning_rate": 3.6879334257975034e-06, "loss": 0.0056, "step": 9112 }, { "epoch": 6.313127814340145, "grad_norm": 0.15516510605812073, "learning_rate": 3.6872399445214984e-06, "loss": 0.003, "step": 9113 }, { "epoch": 6.3138205749913405, "grad_norm": 0.1726953387260437, "learning_rate": 3.686546463245493e-06, "loss": 0.0027, "step": 9114 }, { "epoch": 6.314513335642536, "grad_norm": 0.13942740857601166, "learning_rate": 3.685852981969487e-06, "loss": 0.0027, "step": 9115 }, { "epoch": 6.31520609629373, "grad_norm": 0.37250882387161255, "learning_rate": 3.6851595006934815e-06, "loss": 0.0055, "step": 9116 }, { "epoch": 6.3158988569449255, "grad_norm": 0.44557350873947144, "learning_rate": 3.684466019417476e-06, "loss": 0.0039, "step": 9117 }, { "epoch": 6.316591617596121, "grad_norm": 0.2854665517807007, "learning_rate": 3.683772538141471e-06, "loss": 0.0043, "step": 9118 }, { "epoch": 6.317284378247315, "grad_norm": 0.4197855591773987, "learning_rate": 3.683079056865465e-06, "loss": 0.0032, "step": 9119 }, { "epoch": 6.31797713889851, "grad_norm": 0.2187676578760147, "learning_rate": 3.6823855755894595e-06, "loss": 0.0032, "step": 9120 }, { "epoch": 6.318669899549706, "grad_norm": 0.19195012748241425, "learning_rate": 3.6816920943134536e-06, "loss": 0.0029, "step": 9121 }, { "epoch": 6.319362660200901, "grad_norm": 0.2230643630027771, "learning_rate": 3.680998613037448e-06, "loss": 0.0039, "step": 9122 }, { "epoch": 6.320055420852095, "grad_norm": 0.21015210449695587, "learning_rate": 3.680305131761443e-06, "loss": 0.0047, "step": 9123 }, { "epoch": 6.320748181503291, "grad_norm": 0.1933145821094513, "learning_rate": 3.679611650485437e-06, "loss": 0.003, "step": 9124 }, { "epoch": 6.321440942154486, "grad_norm": 0.22975307703018188, "learning_rate": 3.6789181692094316e-06, "loss": 0.0038, "step": 9125 }, { "epoch": 6.32213370280568, "grad_norm": 0.21831192076206207, "learning_rate": 3.678224687933426e-06, "loss": 0.0036, "step": 9126 }, { "epoch": 6.322826463456876, "grad_norm": 0.2836846113204956, "learning_rate": 3.6775312066574202e-06, "loss": 0.0035, "step": 9127 }, { "epoch": 6.323519224108071, "grad_norm": 0.13700388371944427, "learning_rate": 3.676837725381415e-06, "loss": 0.003, "step": 9128 }, { "epoch": 6.324211984759265, "grad_norm": 0.20510956645011902, "learning_rate": 3.6761442441054097e-06, "loss": 0.0033, "step": 9129 }, { "epoch": 6.3249047454104605, "grad_norm": 0.1835632026195526, "learning_rate": 3.6754507628294038e-06, "loss": 0.0038, "step": 9130 }, { "epoch": 6.325597506061656, "grad_norm": 0.26688703894615173, "learning_rate": 3.6747572815533983e-06, "loss": 0.0035, "step": 9131 }, { "epoch": 6.326290266712851, "grad_norm": 0.21331727504730225, "learning_rate": 3.6740638002773928e-06, "loss": 0.0047, "step": 9132 }, { "epoch": 6.3269830273640455, "grad_norm": 0.1545047014951706, "learning_rate": 3.6733703190013877e-06, "loss": 0.0023, "step": 9133 }, { "epoch": 6.327675788015241, "grad_norm": 0.17784884572029114, "learning_rate": 3.672676837725382e-06, "loss": 0.0033, "step": 9134 }, { "epoch": 6.328368548666436, "grad_norm": 0.1269831359386444, "learning_rate": 3.6719833564493763e-06, "loss": 0.0028, "step": 9135 }, { "epoch": 6.3290613093176304, "grad_norm": 0.2010103464126587, "learning_rate": 3.6712898751733704e-06, "loss": 0.0034, "step": 9136 }, { "epoch": 6.329754069968826, "grad_norm": 0.38688895106315613, "learning_rate": 3.670596393897365e-06, "loss": 0.0051, "step": 9137 }, { "epoch": 6.330446830620021, "grad_norm": 0.2992427945137024, "learning_rate": 3.66990291262136e-06, "loss": 0.0037, "step": 9138 }, { "epoch": 6.331139591271215, "grad_norm": 0.19218286871910095, "learning_rate": 3.669209431345354e-06, "loss": 0.0034, "step": 9139 }, { "epoch": 6.331832351922411, "grad_norm": 0.27887171506881714, "learning_rate": 3.6685159500693484e-06, "loss": 0.0036, "step": 9140 }, { "epoch": 6.332525112573606, "grad_norm": 0.2271146923303604, "learning_rate": 3.667822468793343e-06, "loss": 0.0041, "step": 9141 }, { "epoch": 6.333217873224801, "grad_norm": 0.2160869836807251, "learning_rate": 3.667128987517337e-06, "loss": 0.0029, "step": 9142 }, { "epoch": 6.333910633875996, "grad_norm": 0.2749139964580536, "learning_rate": 3.666435506241332e-06, "loss": 0.004, "step": 9143 }, { "epoch": 6.334603394527191, "grad_norm": 0.24140483140945435, "learning_rate": 3.6657420249653265e-06, "loss": 0.0031, "step": 9144 }, { "epoch": 6.335296155178386, "grad_norm": 0.22241345047950745, "learning_rate": 3.6650485436893206e-06, "loss": 0.0038, "step": 9145 }, { "epoch": 6.335988915829581, "grad_norm": 0.33373576402664185, "learning_rate": 3.664355062413315e-06, "loss": 0.0044, "step": 9146 }, { "epoch": 6.336681676480776, "grad_norm": 0.45689862966537476, "learning_rate": 3.6636615811373096e-06, "loss": 0.0042, "step": 9147 }, { "epoch": 6.337374437131971, "grad_norm": 0.2773498594760895, "learning_rate": 3.662968099861304e-06, "loss": 0.0038, "step": 9148 }, { "epoch": 6.3380671977831655, "grad_norm": 0.151902437210083, "learning_rate": 3.6622746185852986e-06, "loss": 0.0028, "step": 9149 }, { "epoch": 6.338759958434361, "grad_norm": 0.30610865354537964, "learning_rate": 3.661581137309293e-06, "loss": 0.0057, "step": 9150 }, { "epoch": 6.339452719085556, "grad_norm": 0.3921205699443817, "learning_rate": 3.660887656033287e-06, "loss": 0.005, "step": 9151 }, { "epoch": 6.340145479736751, "grad_norm": 0.16900771856307983, "learning_rate": 3.6601941747572817e-06, "loss": 0.0033, "step": 9152 }, { "epoch": 6.340838240387946, "grad_norm": 0.17052240669727325, "learning_rate": 3.6595006934812766e-06, "loss": 0.0037, "step": 9153 }, { "epoch": 6.341531001039141, "grad_norm": 0.4854407012462616, "learning_rate": 3.6588072122052707e-06, "loss": 0.0045, "step": 9154 }, { "epoch": 6.342223761690336, "grad_norm": 0.26672014594078064, "learning_rate": 3.6581137309292652e-06, "loss": 0.0033, "step": 9155 }, { "epoch": 6.342916522341531, "grad_norm": 0.41833311319351196, "learning_rate": 3.6574202496532598e-06, "loss": 0.0051, "step": 9156 }, { "epoch": 6.343609282992726, "grad_norm": 0.27049949765205383, "learning_rate": 3.656726768377254e-06, "loss": 0.0048, "step": 9157 }, { "epoch": 6.344302043643921, "grad_norm": 0.41585874557495117, "learning_rate": 3.6560332871012488e-06, "loss": 0.0065, "step": 9158 }, { "epoch": 6.344994804295116, "grad_norm": 0.17797231674194336, "learning_rate": 3.6553398058252433e-06, "loss": 0.0036, "step": 9159 }, { "epoch": 6.345687564946311, "grad_norm": 0.16923600435256958, "learning_rate": 3.6546463245492374e-06, "loss": 0.0028, "step": 9160 }, { "epoch": 6.346380325597506, "grad_norm": 0.43312036991119385, "learning_rate": 3.653952843273232e-06, "loss": 0.0045, "step": 9161 }, { "epoch": 6.3470730862487015, "grad_norm": 0.1832997351884842, "learning_rate": 3.6532593619972264e-06, "loss": 0.0031, "step": 9162 }, { "epoch": 6.347765846899896, "grad_norm": 0.21749837696552277, "learning_rate": 3.652565880721221e-06, "loss": 0.0031, "step": 9163 }, { "epoch": 6.348458607551091, "grad_norm": 0.19867324829101562, "learning_rate": 3.6518723994452154e-06, "loss": 0.0033, "step": 9164 }, { "epoch": 6.349151368202286, "grad_norm": 0.20064476132392883, "learning_rate": 3.65117891816921e-06, "loss": 0.0035, "step": 9165 }, { "epoch": 6.349844128853481, "grad_norm": 0.39225077629089355, "learning_rate": 3.650485436893204e-06, "loss": 0.0058, "step": 9166 }, { "epoch": 6.350536889504676, "grad_norm": 0.18574102222919464, "learning_rate": 3.6497919556171985e-06, "loss": 0.0025, "step": 9167 }, { "epoch": 6.351229650155871, "grad_norm": 0.32468369603157043, "learning_rate": 3.6490984743411934e-06, "loss": 0.0043, "step": 9168 }, { "epoch": 6.351922410807066, "grad_norm": 0.18796774744987488, "learning_rate": 3.6484049930651875e-06, "loss": 0.0031, "step": 9169 }, { "epoch": 6.352615171458261, "grad_norm": 0.13876697421073914, "learning_rate": 3.647711511789182e-06, "loss": 0.0028, "step": 9170 }, { "epoch": 6.353307932109456, "grad_norm": 0.2559381425380707, "learning_rate": 3.6470180305131766e-06, "loss": 0.0039, "step": 9171 }, { "epoch": 6.354000692760652, "grad_norm": 0.10826985538005829, "learning_rate": 3.6463245492371706e-06, "loss": 0.0023, "step": 9172 }, { "epoch": 6.354693453411846, "grad_norm": 0.2699757218360901, "learning_rate": 3.6456310679611656e-06, "loss": 0.0053, "step": 9173 }, { "epoch": 6.355386214063041, "grad_norm": 0.29450279474258423, "learning_rate": 3.64493758668516e-06, "loss": 0.0033, "step": 9174 }, { "epoch": 6.3560789747142366, "grad_norm": 0.45780789852142334, "learning_rate": 3.644244105409154e-06, "loss": 0.0029, "step": 9175 }, { "epoch": 6.356771735365431, "grad_norm": 0.23914526402950287, "learning_rate": 3.6435506241331487e-06, "loss": 0.0043, "step": 9176 }, { "epoch": 6.357464496016626, "grad_norm": 0.23474185168743134, "learning_rate": 3.642857142857143e-06, "loss": 0.0041, "step": 9177 }, { "epoch": 6.3581572566678215, "grad_norm": 0.4006239175796509, "learning_rate": 3.6421636615811377e-06, "loss": 0.0041, "step": 9178 }, { "epoch": 6.358850017319016, "grad_norm": 0.31819018721580505, "learning_rate": 3.641470180305132e-06, "loss": 0.0038, "step": 9179 }, { "epoch": 6.359542777970211, "grad_norm": 0.18153499066829681, "learning_rate": 3.6407766990291267e-06, "loss": 0.0034, "step": 9180 }, { "epoch": 6.3602355386214064, "grad_norm": 0.32186275720596313, "learning_rate": 3.640083217753121e-06, "loss": 0.0036, "step": 9181 }, { "epoch": 6.360928299272602, "grad_norm": 0.16871248185634613, "learning_rate": 3.6393897364771153e-06, "loss": 0.0028, "step": 9182 }, { "epoch": 6.361621059923796, "grad_norm": 0.2546533942222595, "learning_rate": 3.6386962552011103e-06, "loss": 0.0033, "step": 9183 }, { "epoch": 6.362313820574991, "grad_norm": 0.30084696412086487, "learning_rate": 3.6380027739251043e-06, "loss": 0.0031, "step": 9184 }, { "epoch": 6.363006581226187, "grad_norm": 0.24564731121063232, "learning_rate": 3.637309292649099e-06, "loss": 0.0035, "step": 9185 }, { "epoch": 6.363699341877381, "grad_norm": 0.5032068490982056, "learning_rate": 3.6366158113730934e-06, "loss": 0.0037, "step": 9186 }, { "epoch": 6.364392102528576, "grad_norm": 0.5219812989234924, "learning_rate": 3.6359223300970874e-06, "loss": 0.0037, "step": 9187 }, { "epoch": 6.365084863179772, "grad_norm": 0.15681755542755127, "learning_rate": 3.6352288488210824e-06, "loss": 0.0026, "step": 9188 }, { "epoch": 6.365777623830966, "grad_norm": 0.16239261627197266, "learning_rate": 3.634535367545077e-06, "loss": 0.0032, "step": 9189 }, { "epoch": 6.366470384482161, "grad_norm": 0.24097208678722382, "learning_rate": 3.633841886269071e-06, "loss": 0.0033, "step": 9190 }, { "epoch": 6.367163145133357, "grad_norm": 0.18855173885822296, "learning_rate": 3.6331484049930655e-06, "loss": 0.0024, "step": 9191 }, { "epoch": 6.367855905784552, "grad_norm": 0.45409417152404785, "learning_rate": 3.6324549237170596e-06, "loss": 0.005, "step": 9192 }, { "epoch": 6.368548666435746, "grad_norm": 0.18834039568901062, "learning_rate": 3.6317614424410545e-06, "loss": 0.0031, "step": 9193 }, { "epoch": 6.3692414270869415, "grad_norm": 0.2133660614490509, "learning_rate": 3.631067961165049e-06, "loss": 0.0042, "step": 9194 }, { "epoch": 6.369934187738137, "grad_norm": 0.2551059424877167, "learning_rate": 3.6303744798890435e-06, "loss": 0.0042, "step": 9195 }, { "epoch": 6.370626948389331, "grad_norm": 0.19197168946266174, "learning_rate": 3.6296809986130376e-06, "loss": 0.0048, "step": 9196 }, { "epoch": 6.3713197090405265, "grad_norm": 0.2950360178947449, "learning_rate": 3.628987517337032e-06, "loss": 0.0047, "step": 9197 }, { "epoch": 6.372012469691722, "grad_norm": 0.48158374428749084, "learning_rate": 3.628294036061027e-06, "loss": 0.0061, "step": 9198 }, { "epoch": 6.372705230342916, "grad_norm": 0.34344348311424255, "learning_rate": 3.627600554785021e-06, "loss": 0.0035, "step": 9199 }, { "epoch": 6.373397990994111, "grad_norm": 0.3352544605731964, "learning_rate": 3.6269070735090156e-06, "loss": 0.004, "step": 9200 }, { "epoch": 6.374090751645307, "grad_norm": 0.28213703632354736, "learning_rate": 3.62621359223301e-06, "loss": 0.0038, "step": 9201 }, { "epoch": 6.374783512296502, "grad_norm": 0.25421786308288574, "learning_rate": 3.6255201109570042e-06, "loss": 0.0044, "step": 9202 }, { "epoch": 6.375476272947696, "grad_norm": 0.21430328488349915, "learning_rate": 3.6248266296809988e-06, "loss": 0.003, "step": 9203 }, { "epoch": 6.376169033598892, "grad_norm": 0.319079726934433, "learning_rate": 3.6241331484049937e-06, "loss": 0.0044, "step": 9204 }, { "epoch": 6.376861794250087, "grad_norm": 0.3768300414085388, "learning_rate": 3.6234396671289878e-06, "loss": 0.0034, "step": 9205 }, { "epoch": 6.377554554901281, "grad_norm": 0.2236616015434265, "learning_rate": 3.6227461858529823e-06, "loss": 0.0034, "step": 9206 }, { "epoch": 6.378247315552477, "grad_norm": 0.30210188031196594, "learning_rate": 3.6220527045769764e-06, "loss": 0.0039, "step": 9207 }, { "epoch": 6.378940076203672, "grad_norm": 0.32834911346435547, "learning_rate": 3.621359223300971e-06, "loss": 0.005, "step": 9208 }, { "epoch": 6.379632836854866, "grad_norm": 0.253227561712265, "learning_rate": 3.620665742024966e-06, "loss": 0.0038, "step": 9209 }, { "epoch": 6.3803255975060615, "grad_norm": 0.2744441330432892, "learning_rate": 3.6199722607489603e-06, "loss": 0.0044, "step": 9210 }, { "epoch": 6.381018358157257, "grad_norm": 0.278323769569397, "learning_rate": 3.6192787794729544e-06, "loss": 0.0034, "step": 9211 }, { "epoch": 6.381711118808452, "grad_norm": 0.14826999604701996, "learning_rate": 3.618585298196949e-06, "loss": 0.0025, "step": 9212 }, { "epoch": 6.3824038794596465, "grad_norm": 0.15909333527088165, "learning_rate": 3.617891816920943e-06, "loss": 0.0028, "step": 9213 }, { "epoch": 6.383096640110842, "grad_norm": 0.21053312718868256, "learning_rate": 3.617198335644938e-06, "loss": 0.0034, "step": 9214 }, { "epoch": 6.383789400762037, "grad_norm": 0.34448710083961487, "learning_rate": 3.6165048543689324e-06, "loss": 0.0037, "step": 9215 }, { "epoch": 6.384482161413231, "grad_norm": 0.2399231195449829, "learning_rate": 3.6158113730929265e-06, "loss": 0.0038, "step": 9216 }, { "epoch": 6.385174922064427, "grad_norm": 0.1950463205575943, "learning_rate": 3.615117891816921e-06, "loss": 0.0031, "step": 9217 }, { "epoch": 6.385867682715622, "grad_norm": 0.2300952523946762, "learning_rate": 3.6144244105409156e-06, "loss": 0.0029, "step": 9218 }, { "epoch": 6.386560443366816, "grad_norm": 0.2720438539981842, "learning_rate": 3.6137309292649105e-06, "loss": 0.0033, "step": 9219 }, { "epoch": 6.387253204018012, "grad_norm": 0.4348621964454651, "learning_rate": 3.6130374479889046e-06, "loss": 0.004, "step": 9220 }, { "epoch": 6.387945964669207, "grad_norm": 0.23593385517597198, "learning_rate": 3.612343966712899e-06, "loss": 0.006, "step": 9221 }, { "epoch": 6.388638725320401, "grad_norm": 0.4404006004333496, "learning_rate": 3.611650485436893e-06, "loss": 0.0039, "step": 9222 }, { "epoch": 6.389331485971597, "grad_norm": 0.3923046588897705, "learning_rate": 3.6109570041608877e-06, "loss": 0.0039, "step": 9223 }, { "epoch": 6.390024246622792, "grad_norm": 0.1145739033818245, "learning_rate": 3.6102635228848826e-06, "loss": 0.0023, "step": 9224 }, { "epoch": 6.390717007273987, "grad_norm": 0.24858969449996948, "learning_rate": 3.609570041608877e-06, "loss": 0.0025, "step": 9225 }, { "epoch": 6.391409767925182, "grad_norm": 0.20601050555706024, "learning_rate": 3.608876560332871e-06, "loss": 0.0052, "step": 9226 }, { "epoch": 6.392102528576377, "grad_norm": 0.4553786814212799, "learning_rate": 3.6081830790568657e-06, "loss": 0.0047, "step": 9227 }, { "epoch": 6.392795289227572, "grad_norm": 0.30672401189804077, "learning_rate": 3.60748959778086e-06, "loss": 0.004, "step": 9228 }, { "epoch": 6.3934880498787665, "grad_norm": 0.3431636095046997, "learning_rate": 3.6067961165048547e-06, "loss": 0.0036, "step": 9229 }, { "epoch": 6.394180810529962, "grad_norm": 0.37345394492149353, "learning_rate": 3.6061026352288493e-06, "loss": 0.011, "step": 9230 }, { "epoch": 6.394873571181157, "grad_norm": 0.506641685962677, "learning_rate": 3.6054091539528433e-06, "loss": 0.0032, "step": 9231 }, { "epoch": 6.395566331832352, "grad_norm": 0.3148530125617981, "learning_rate": 3.604715672676838e-06, "loss": 0.0036, "step": 9232 }, { "epoch": 6.396259092483547, "grad_norm": 0.1761692464351654, "learning_rate": 3.6040221914008324e-06, "loss": 0.0029, "step": 9233 }, { "epoch": 6.396951853134742, "grad_norm": 0.21109837293624878, "learning_rate": 3.6033287101248273e-06, "loss": 0.0036, "step": 9234 }, { "epoch": 6.397644613785937, "grad_norm": 0.16789598762989044, "learning_rate": 3.6026352288488214e-06, "loss": 0.0025, "step": 9235 }, { "epoch": 6.398337374437132, "grad_norm": 0.23829500377178192, "learning_rate": 3.601941747572816e-06, "loss": 0.0037, "step": 9236 }, { "epoch": 6.399030135088327, "grad_norm": 0.23452800512313843, "learning_rate": 3.60124826629681e-06, "loss": 0.0035, "step": 9237 }, { "epoch": 6.399722895739522, "grad_norm": 0.13841485977172852, "learning_rate": 3.6005547850208045e-06, "loss": 0.0027, "step": 9238 }, { "epoch": 6.400415656390717, "grad_norm": 0.20119313895702362, "learning_rate": 3.5998613037447994e-06, "loss": 0.0049, "step": 9239 }, { "epoch": 6.401108417041912, "grad_norm": 0.16879573464393616, "learning_rate": 3.5991678224687935e-06, "loss": 0.0032, "step": 9240 }, { "epoch": 6.401801177693107, "grad_norm": 0.1579677313566208, "learning_rate": 3.598474341192788e-06, "loss": 0.0023, "step": 9241 }, { "epoch": 6.402493938344302, "grad_norm": 0.5832002758979797, "learning_rate": 3.5977808599167825e-06, "loss": 0.0061, "step": 9242 }, { "epoch": 6.403186698995497, "grad_norm": 0.21203866600990295, "learning_rate": 3.5970873786407766e-06, "loss": 0.0037, "step": 9243 }, { "epoch": 6.403879459646692, "grad_norm": 0.39060166478157043, "learning_rate": 3.5963938973647715e-06, "loss": 0.0076, "step": 9244 }, { "epoch": 6.404572220297887, "grad_norm": 0.23987151682376862, "learning_rate": 3.595700416088766e-06, "loss": 0.0034, "step": 9245 }, { "epoch": 6.405264980949082, "grad_norm": 0.6974042057991028, "learning_rate": 3.59500693481276e-06, "loss": 0.0039, "step": 9246 }, { "epoch": 6.405957741600277, "grad_norm": 0.36157554388046265, "learning_rate": 3.5943134535367546e-06, "loss": 0.0047, "step": 9247 }, { "epoch": 6.406650502251472, "grad_norm": 0.44548600912094116, "learning_rate": 3.593619972260749e-06, "loss": 0.0058, "step": 9248 }, { "epoch": 6.407343262902667, "grad_norm": 0.21171928942203522, "learning_rate": 3.592926490984744e-06, "loss": 0.003, "step": 9249 }, { "epoch": 6.408036023553862, "grad_norm": 0.22173413634300232, "learning_rate": 3.592233009708738e-06, "loss": 0.0039, "step": 9250 }, { "epoch": 6.408728784205057, "grad_norm": 0.5000897645950317, "learning_rate": 3.5915395284327327e-06, "loss": 0.0059, "step": 9251 }, { "epoch": 6.409421544856253, "grad_norm": 0.18607904016971588, "learning_rate": 3.5908460471567268e-06, "loss": 0.0033, "step": 9252 }, { "epoch": 6.410114305507447, "grad_norm": 0.12777851521968842, "learning_rate": 3.5901525658807213e-06, "loss": 0.0029, "step": 9253 }, { "epoch": 6.410807066158642, "grad_norm": 0.13433578610420227, "learning_rate": 3.5894590846047162e-06, "loss": 0.0028, "step": 9254 }, { "epoch": 6.4114998268098375, "grad_norm": 0.19018560647964478, "learning_rate": 3.5887656033287103e-06, "loss": 0.0031, "step": 9255 }, { "epoch": 6.412192587461032, "grad_norm": 0.47062408924102783, "learning_rate": 3.588072122052705e-06, "loss": 0.0055, "step": 9256 }, { "epoch": 6.412885348112227, "grad_norm": 0.2807880938053131, "learning_rate": 3.5873786407766993e-06, "loss": 0.0046, "step": 9257 }, { "epoch": 6.4135781087634225, "grad_norm": 0.3895263671875, "learning_rate": 3.5866851595006934e-06, "loss": 0.0042, "step": 9258 }, { "epoch": 6.414270869414617, "grad_norm": 0.5025876760482788, "learning_rate": 3.5859916782246883e-06, "loss": 0.0045, "step": 9259 }, { "epoch": 6.414963630065812, "grad_norm": 0.14714136719703674, "learning_rate": 3.585298196948683e-06, "loss": 0.0032, "step": 9260 }, { "epoch": 6.415656390717007, "grad_norm": 0.23582592606544495, "learning_rate": 3.584604715672677e-06, "loss": 0.003, "step": 9261 }, { "epoch": 6.416349151368202, "grad_norm": 0.2524140179157257, "learning_rate": 3.5839112343966715e-06, "loss": 0.0042, "step": 9262 }, { "epoch": 6.417041912019397, "grad_norm": 0.29775092005729675, "learning_rate": 3.583217753120666e-06, "loss": 0.0048, "step": 9263 }, { "epoch": 6.417734672670592, "grad_norm": 0.14647287130355835, "learning_rate": 3.5825242718446605e-06, "loss": 0.0028, "step": 9264 }, { "epoch": 6.418427433321788, "grad_norm": 0.2190023958683014, "learning_rate": 3.581830790568655e-06, "loss": 0.0028, "step": 9265 }, { "epoch": 6.419120193972982, "grad_norm": 0.44877180457115173, "learning_rate": 3.5811373092926495e-06, "loss": 0.0031, "step": 9266 }, { "epoch": 6.419812954624177, "grad_norm": 0.2303936928510666, "learning_rate": 3.5804438280166436e-06, "loss": 0.0039, "step": 9267 }, { "epoch": 6.420505715275373, "grad_norm": 0.19964253902435303, "learning_rate": 3.579750346740638e-06, "loss": 0.0031, "step": 9268 }, { "epoch": 6.421198475926567, "grad_norm": 0.2720945477485657, "learning_rate": 3.579056865464633e-06, "loss": 0.0041, "step": 9269 }, { "epoch": 6.421891236577762, "grad_norm": 0.2579038441181183, "learning_rate": 3.578363384188627e-06, "loss": 0.0036, "step": 9270 }, { "epoch": 6.422583997228958, "grad_norm": 0.2768496572971344, "learning_rate": 3.5776699029126216e-06, "loss": 0.0026, "step": 9271 }, { "epoch": 6.423276757880153, "grad_norm": 0.3870190382003784, "learning_rate": 3.576976421636616e-06, "loss": 0.006, "step": 9272 }, { "epoch": 6.423969518531347, "grad_norm": 0.2708127200603485, "learning_rate": 3.5762829403606102e-06, "loss": 0.0037, "step": 9273 }, { "epoch": 6.4246622791825425, "grad_norm": 0.1558944284915924, "learning_rate": 3.575589459084605e-06, "loss": 0.0023, "step": 9274 }, { "epoch": 6.425355039833738, "grad_norm": 0.40814536809921265, "learning_rate": 3.5748959778085997e-06, "loss": 0.0054, "step": 9275 }, { "epoch": 6.426047800484932, "grad_norm": 0.20814627408981323, "learning_rate": 3.5742024965325937e-06, "loss": 0.0028, "step": 9276 }, { "epoch": 6.4267405611361275, "grad_norm": 0.19707806408405304, "learning_rate": 3.5735090152565883e-06, "loss": 0.0034, "step": 9277 }, { "epoch": 6.427433321787323, "grad_norm": 0.19731505215168, "learning_rate": 3.5728155339805828e-06, "loss": 0.0031, "step": 9278 }, { "epoch": 6.428126082438517, "grad_norm": 0.43673670291900635, "learning_rate": 3.5721220527045773e-06, "loss": 0.0065, "step": 9279 }, { "epoch": 6.428818843089712, "grad_norm": 0.46689996123313904, "learning_rate": 3.5714285714285718e-06, "loss": 0.0069, "step": 9280 }, { "epoch": 6.429511603740908, "grad_norm": 0.2391880601644516, "learning_rate": 3.5707350901525663e-06, "loss": 0.003, "step": 9281 }, { "epoch": 6.430204364392102, "grad_norm": 0.13410313427448273, "learning_rate": 3.5700416088765604e-06, "loss": 0.0029, "step": 9282 }, { "epoch": 6.430897125043297, "grad_norm": 0.3177035450935364, "learning_rate": 3.569348127600555e-06, "loss": 0.004, "step": 9283 }, { "epoch": 6.431589885694493, "grad_norm": 0.16396614909172058, "learning_rate": 3.56865464632455e-06, "loss": 0.003, "step": 9284 }, { "epoch": 6.432282646345688, "grad_norm": 0.23232145607471466, "learning_rate": 3.567961165048544e-06, "loss": 0.0035, "step": 9285 }, { "epoch": 6.432975406996882, "grad_norm": 0.30791035294532776, "learning_rate": 3.5672676837725384e-06, "loss": 0.0038, "step": 9286 }, { "epoch": 6.433668167648078, "grad_norm": 0.27299022674560547, "learning_rate": 3.566574202496533e-06, "loss": 0.0031, "step": 9287 }, { "epoch": 6.434360928299273, "grad_norm": 0.15564918518066406, "learning_rate": 3.565880721220527e-06, "loss": 0.0034, "step": 9288 }, { "epoch": 6.435053688950467, "grad_norm": 0.31566473841667175, "learning_rate": 3.565187239944522e-06, "loss": 0.0036, "step": 9289 }, { "epoch": 6.4357464496016625, "grad_norm": 0.1905096173286438, "learning_rate": 3.5644937586685165e-06, "loss": 0.0028, "step": 9290 }, { "epoch": 6.436439210252858, "grad_norm": 0.42711296677589417, "learning_rate": 3.5638002773925105e-06, "loss": 0.0041, "step": 9291 }, { "epoch": 6.437131970904053, "grad_norm": 0.2646515369415283, "learning_rate": 3.563106796116505e-06, "loss": 0.0032, "step": 9292 }, { "epoch": 6.4378247315552475, "grad_norm": 0.20329734683036804, "learning_rate": 3.562413314840499e-06, "loss": 0.0038, "step": 9293 }, { "epoch": 6.438517492206443, "grad_norm": 0.2283334732055664, "learning_rate": 3.561719833564494e-06, "loss": 0.003, "step": 9294 }, { "epoch": 6.439210252857638, "grad_norm": 0.27285119891166687, "learning_rate": 3.5610263522884886e-06, "loss": 0.0031, "step": 9295 }, { "epoch": 6.439903013508832, "grad_norm": 0.5273153781890869, "learning_rate": 3.560332871012483e-06, "loss": 0.0034, "step": 9296 }, { "epoch": 6.440595774160028, "grad_norm": 0.2023629993200302, "learning_rate": 3.559639389736477e-06, "loss": 0.0039, "step": 9297 }, { "epoch": 6.441288534811223, "grad_norm": 0.29897841811180115, "learning_rate": 3.5589459084604717e-06, "loss": 0.0042, "step": 9298 }, { "epoch": 6.441981295462417, "grad_norm": 0.16852521896362305, "learning_rate": 3.5582524271844666e-06, "loss": 0.0041, "step": 9299 }, { "epoch": 6.442674056113613, "grad_norm": 0.22829200327396393, "learning_rate": 3.5575589459084607e-06, "loss": 0.0046, "step": 9300 }, { "epoch": 6.443366816764808, "grad_norm": 0.43147629499435425, "learning_rate": 3.5568654646324552e-06, "loss": 0.0041, "step": 9301 }, { "epoch": 6.444059577416002, "grad_norm": 0.17013610899448395, "learning_rate": 3.5561719833564497e-06, "loss": 0.0026, "step": 9302 }, { "epoch": 6.444752338067198, "grad_norm": 0.19303420186042786, "learning_rate": 3.555478502080444e-06, "loss": 0.0036, "step": 9303 }, { "epoch": 6.445445098718393, "grad_norm": 0.2566279172897339, "learning_rate": 3.5547850208044387e-06, "loss": 0.0046, "step": 9304 }, { "epoch": 6.446137859369588, "grad_norm": 0.4512794315814972, "learning_rate": 3.5540915395284333e-06, "loss": 0.0043, "step": 9305 }, { "epoch": 6.4468306200207826, "grad_norm": 0.16259980201721191, "learning_rate": 3.5533980582524273e-06, "loss": 0.0028, "step": 9306 }, { "epoch": 6.447523380671978, "grad_norm": 0.15087482333183289, "learning_rate": 3.552704576976422e-06, "loss": 0.003, "step": 9307 }, { "epoch": 6.448216141323173, "grad_norm": 0.19279569387435913, "learning_rate": 3.552011095700416e-06, "loss": 0.0038, "step": 9308 }, { "epoch": 6.4489089019743675, "grad_norm": 0.2542977035045624, "learning_rate": 3.551317614424411e-06, "loss": 0.0026, "step": 9309 }, { "epoch": 6.449601662625563, "grad_norm": 0.20152418315410614, "learning_rate": 3.5506241331484054e-06, "loss": 0.0029, "step": 9310 }, { "epoch": 6.450294423276758, "grad_norm": 0.2344168871641159, "learning_rate": 3.5499306518724e-06, "loss": 0.0039, "step": 9311 }, { "epoch": 6.450987183927953, "grad_norm": 0.11551791429519653, "learning_rate": 3.549237170596394e-06, "loss": 0.0024, "step": 9312 }, { "epoch": 6.451679944579148, "grad_norm": 0.1495424211025238, "learning_rate": 3.5485436893203885e-06, "loss": 0.0026, "step": 9313 }, { "epoch": 6.452372705230343, "grad_norm": 0.36056405305862427, "learning_rate": 3.5478502080443834e-06, "loss": 0.0059, "step": 9314 }, { "epoch": 6.453065465881538, "grad_norm": 0.1673860251903534, "learning_rate": 3.5471567267683775e-06, "loss": 0.0028, "step": 9315 }, { "epoch": 6.453758226532733, "grad_norm": 0.46804893016815186, "learning_rate": 3.546463245492372e-06, "loss": 0.007, "step": 9316 }, { "epoch": 6.454450987183928, "grad_norm": 0.18465572595596313, "learning_rate": 3.545769764216366e-06, "loss": 0.0028, "step": 9317 }, { "epoch": 6.455143747835123, "grad_norm": 0.3037743866443634, "learning_rate": 3.5450762829403606e-06, "loss": 0.0035, "step": 9318 }, { "epoch": 6.455836508486318, "grad_norm": 0.5053547024726868, "learning_rate": 3.5443828016643555e-06, "loss": 0.0043, "step": 9319 }, { "epoch": 6.456529269137513, "grad_norm": 0.1676502823829651, "learning_rate": 3.54368932038835e-06, "loss": 0.0027, "step": 9320 }, { "epoch": 6.457222029788708, "grad_norm": 0.28507503867149353, "learning_rate": 3.542995839112344e-06, "loss": 0.0034, "step": 9321 }, { "epoch": 6.457914790439903, "grad_norm": 0.18971438705921173, "learning_rate": 3.5423023578363387e-06, "loss": 0.0035, "step": 9322 }, { "epoch": 6.458607551091098, "grad_norm": 0.26438286900520325, "learning_rate": 3.5416088765603327e-06, "loss": 0.0037, "step": 9323 }, { "epoch": 6.459300311742293, "grad_norm": 0.29785943031311035, "learning_rate": 3.5409153952843277e-06, "loss": 0.0039, "step": 9324 }, { "epoch": 6.459993072393488, "grad_norm": 0.21474888920783997, "learning_rate": 3.540221914008322e-06, "loss": 0.0034, "step": 9325 }, { "epoch": 6.460685833044683, "grad_norm": 0.1615288406610489, "learning_rate": 3.5395284327323167e-06, "loss": 0.003, "step": 9326 }, { "epoch": 6.461378593695878, "grad_norm": 0.25554540753364563, "learning_rate": 3.5388349514563108e-06, "loss": 0.0073, "step": 9327 }, { "epoch": 6.462071354347073, "grad_norm": 0.21433214843273163, "learning_rate": 3.5381414701803053e-06, "loss": 0.0031, "step": 9328 }, { "epoch": 6.462764114998268, "grad_norm": 0.2230767160654068, "learning_rate": 3.5374479889043002e-06, "loss": 0.0033, "step": 9329 }, { "epoch": 6.463456875649463, "grad_norm": 0.27657797932624817, "learning_rate": 3.5367545076282943e-06, "loss": 0.0036, "step": 9330 }, { "epoch": 6.464149636300658, "grad_norm": 0.22122372686862946, "learning_rate": 3.536061026352289e-06, "loss": 0.0036, "step": 9331 }, { "epoch": 6.464842396951854, "grad_norm": 0.2776499092578888, "learning_rate": 3.535367545076283e-06, "loss": 0.0034, "step": 9332 }, { "epoch": 6.465535157603048, "grad_norm": 0.3153015077114105, "learning_rate": 3.5346740638002774e-06, "loss": 0.0025, "step": 9333 }, { "epoch": 6.466227918254243, "grad_norm": 0.1284671276807785, "learning_rate": 3.5339805825242724e-06, "loss": 0.0022, "step": 9334 }, { "epoch": 6.4669206789054385, "grad_norm": 0.30792906880378723, "learning_rate": 3.533287101248267e-06, "loss": 0.004, "step": 9335 }, { "epoch": 6.467613439556633, "grad_norm": 0.6116307377815247, "learning_rate": 3.532593619972261e-06, "loss": 0.0028, "step": 9336 }, { "epoch": 6.468306200207828, "grad_norm": 0.12195632606744766, "learning_rate": 3.5319001386962555e-06, "loss": 0.0024, "step": 9337 }, { "epoch": 6.4689989608590235, "grad_norm": 0.20950274169445038, "learning_rate": 3.5312066574202495e-06, "loss": 0.003, "step": 9338 }, { "epoch": 6.469691721510218, "grad_norm": 0.30685269832611084, "learning_rate": 3.5305131761442445e-06, "loss": 0.0041, "step": 9339 }, { "epoch": 6.470384482161413, "grad_norm": 0.34830111265182495, "learning_rate": 3.529819694868239e-06, "loss": 0.0046, "step": 9340 }, { "epoch": 6.471077242812608, "grad_norm": 0.22837074100971222, "learning_rate": 3.5291262135922335e-06, "loss": 0.0027, "step": 9341 }, { "epoch": 6.471770003463803, "grad_norm": 0.38324078917503357, "learning_rate": 3.5284327323162276e-06, "loss": 0.0028, "step": 9342 }, { "epoch": 6.472462764114998, "grad_norm": 0.3997797667980194, "learning_rate": 3.527739251040222e-06, "loss": 0.0032, "step": 9343 }, { "epoch": 6.473155524766193, "grad_norm": 0.18856115639209747, "learning_rate": 3.527045769764217e-06, "loss": 0.0041, "step": 9344 }, { "epoch": 6.473848285417389, "grad_norm": 0.2529909610748291, "learning_rate": 3.526352288488211e-06, "loss": 0.0034, "step": 9345 }, { "epoch": 6.474541046068583, "grad_norm": 0.16508552432060242, "learning_rate": 3.5256588072122056e-06, "loss": 0.0025, "step": 9346 }, { "epoch": 6.475233806719778, "grad_norm": 0.14784452319145203, "learning_rate": 3.5249653259361997e-06, "loss": 0.0024, "step": 9347 }, { "epoch": 6.475926567370974, "grad_norm": 0.2928319573402405, "learning_rate": 3.5242718446601942e-06, "loss": 0.0038, "step": 9348 }, { "epoch": 6.476619328022168, "grad_norm": 0.2895059287548065, "learning_rate": 3.523578363384189e-06, "loss": 0.0043, "step": 9349 }, { "epoch": 6.477312088673363, "grad_norm": 0.3710174560546875, "learning_rate": 3.5228848821081837e-06, "loss": 0.0033, "step": 9350 }, { "epoch": 6.478004849324559, "grad_norm": 0.3162163197994232, "learning_rate": 3.5221914008321777e-06, "loss": 0.0033, "step": 9351 }, { "epoch": 6.478697609975754, "grad_norm": 0.29709571599960327, "learning_rate": 3.5214979195561723e-06, "loss": 0.0033, "step": 9352 }, { "epoch": 6.479390370626948, "grad_norm": 0.34101665019989014, "learning_rate": 3.5208044382801663e-06, "loss": 0.0055, "step": 9353 }, { "epoch": 6.4800831312781435, "grad_norm": 0.28122788667678833, "learning_rate": 3.5201109570041613e-06, "loss": 0.0037, "step": 9354 }, { "epoch": 6.480775891929339, "grad_norm": 0.16156338155269623, "learning_rate": 3.5194174757281558e-06, "loss": 0.0028, "step": 9355 }, { "epoch": 6.481468652580533, "grad_norm": 0.24210387468338013, "learning_rate": 3.51872399445215e-06, "loss": 0.0044, "step": 9356 }, { "epoch": 6.4821614132317285, "grad_norm": 0.30856701731681824, "learning_rate": 3.5180305131761444e-06, "loss": 0.0041, "step": 9357 }, { "epoch": 6.482854173882924, "grad_norm": 0.3893034756183624, "learning_rate": 3.517337031900139e-06, "loss": 0.0034, "step": 9358 }, { "epoch": 6.483546934534118, "grad_norm": 0.13166674971580505, "learning_rate": 3.516643550624134e-06, "loss": 0.0021, "step": 9359 }, { "epoch": 6.484239695185313, "grad_norm": 0.32749444246292114, "learning_rate": 3.515950069348128e-06, "loss": 0.0051, "step": 9360 }, { "epoch": 6.484932455836509, "grad_norm": 0.36538147926330566, "learning_rate": 3.5152565880721224e-06, "loss": 0.0043, "step": 9361 }, { "epoch": 6.485625216487703, "grad_norm": 0.32941997051239014, "learning_rate": 3.5145631067961165e-06, "loss": 0.0037, "step": 9362 }, { "epoch": 6.486317977138898, "grad_norm": 0.10568477213382721, "learning_rate": 3.513869625520111e-06, "loss": 0.0022, "step": 9363 }, { "epoch": 6.487010737790094, "grad_norm": 0.160566508769989, "learning_rate": 3.513176144244106e-06, "loss": 0.0025, "step": 9364 }, { "epoch": 6.487703498441289, "grad_norm": 0.16372741758823395, "learning_rate": 3.5124826629681005e-06, "loss": 0.0026, "step": 9365 }, { "epoch": 6.488396259092483, "grad_norm": 0.1735554039478302, "learning_rate": 3.5117891816920945e-06, "loss": 0.0027, "step": 9366 }, { "epoch": 6.489089019743679, "grad_norm": 0.34052911400794983, "learning_rate": 3.511095700416089e-06, "loss": 0.0037, "step": 9367 }, { "epoch": 6.489781780394874, "grad_norm": 0.47544294595718384, "learning_rate": 3.510402219140083e-06, "loss": 0.0071, "step": 9368 }, { "epoch": 6.490474541046068, "grad_norm": 0.28894108533859253, "learning_rate": 3.509708737864078e-06, "loss": 0.0043, "step": 9369 }, { "epoch": 6.4911673016972635, "grad_norm": 0.18008123338222504, "learning_rate": 3.5090152565880726e-06, "loss": 0.0027, "step": 9370 }, { "epoch": 6.491860062348459, "grad_norm": 0.17859375476837158, "learning_rate": 3.5083217753120667e-06, "loss": 0.0026, "step": 9371 }, { "epoch": 6.492552822999654, "grad_norm": 0.36944127082824707, "learning_rate": 3.507628294036061e-06, "loss": 0.0058, "step": 9372 }, { "epoch": 6.4932455836508485, "grad_norm": 0.13610002398490906, "learning_rate": 3.5069348127600557e-06, "loss": 0.0023, "step": 9373 }, { "epoch": 6.493938344302044, "grad_norm": 0.26454663276672363, "learning_rate": 3.5062413314840506e-06, "loss": 0.004, "step": 9374 }, { "epoch": 6.494631104953239, "grad_norm": 0.15831910073757172, "learning_rate": 3.5055478502080447e-06, "loss": 0.0029, "step": 9375 }, { "epoch": 6.495323865604433, "grad_norm": 0.44009876251220703, "learning_rate": 3.5048543689320392e-06, "loss": 0.0035, "step": 9376 }, { "epoch": 6.496016626255629, "grad_norm": 0.21103429794311523, "learning_rate": 3.5041608876560333e-06, "loss": 0.0039, "step": 9377 }, { "epoch": 6.496709386906824, "grad_norm": 0.6784105896949768, "learning_rate": 3.503467406380028e-06, "loss": 0.0049, "step": 9378 }, { "epoch": 6.497402147558018, "grad_norm": 0.14801089465618134, "learning_rate": 3.5027739251040228e-06, "loss": 0.0025, "step": 9379 }, { "epoch": 6.498094908209214, "grad_norm": 0.21305952966213226, "learning_rate": 3.502080443828017e-06, "loss": 0.0026, "step": 9380 }, { "epoch": 6.498787668860409, "grad_norm": 0.3113575279712677, "learning_rate": 3.5013869625520114e-06, "loss": 0.0046, "step": 9381 }, { "epoch": 6.499480429511603, "grad_norm": 0.569118082523346, "learning_rate": 3.500693481276006e-06, "loss": 0.0031, "step": 9382 }, { "epoch": 6.500173190162799, "grad_norm": 0.447287380695343, "learning_rate": 3.5e-06, "loss": 0.0047, "step": 9383 }, { "epoch": 6.500865950813994, "grad_norm": 0.2354438304901123, "learning_rate": 3.499306518723995e-06, "loss": 0.0039, "step": 9384 }, { "epoch": 6.501558711465189, "grad_norm": 0.2539510428905487, "learning_rate": 3.4986130374479894e-06, "loss": 0.003, "step": 9385 }, { "epoch": 6.5022514721163835, "grad_norm": 0.17222854495048523, "learning_rate": 3.4979195561719835e-06, "loss": 0.0024, "step": 9386 }, { "epoch": 6.502944232767579, "grad_norm": 0.21358181536197662, "learning_rate": 3.497226074895978e-06, "loss": 0.0037, "step": 9387 }, { "epoch": 6.503636993418774, "grad_norm": 0.15497736632823944, "learning_rate": 3.4965325936199725e-06, "loss": 0.0028, "step": 9388 }, { "epoch": 6.5043297540699685, "grad_norm": 0.15973694622516632, "learning_rate": 3.4958391123439674e-06, "loss": 0.0035, "step": 9389 }, { "epoch": 6.505022514721164, "grad_norm": 0.21442706882953644, "learning_rate": 3.4951456310679615e-06, "loss": 0.0035, "step": 9390 }, { "epoch": 6.505715275372359, "grad_norm": 0.3782165050506592, "learning_rate": 3.494452149791956e-06, "loss": 0.0073, "step": 9391 }, { "epoch": 6.506408036023554, "grad_norm": 0.2783690094947815, "learning_rate": 3.49375866851595e-06, "loss": 0.0034, "step": 9392 }, { "epoch": 6.507100796674749, "grad_norm": 0.13924583792686462, "learning_rate": 3.4930651872399446e-06, "loss": 0.0025, "step": 9393 }, { "epoch": 6.507793557325944, "grad_norm": 0.2684783935546875, "learning_rate": 3.4923717059639396e-06, "loss": 0.0049, "step": 9394 }, { "epoch": 6.508486317977139, "grad_norm": 0.25154218077659607, "learning_rate": 3.4916782246879336e-06, "loss": 0.0049, "step": 9395 }, { "epoch": 6.509179078628334, "grad_norm": 0.22599586844444275, "learning_rate": 3.490984743411928e-06, "loss": 0.0044, "step": 9396 }, { "epoch": 6.509871839279529, "grad_norm": 0.14814086258411407, "learning_rate": 3.4902912621359227e-06, "loss": 0.0027, "step": 9397 }, { "epoch": 6.510564599930724, "grad_norm": 0.241136834025383, "learning_rate": 3.4895977808599167e-06, "loss": 0.0037, "step": 9398 }, { "epoch": 6.511257360581919, "grad_norm": 0.6824232935905457, "learning_rate": 3.4889042995839117e-06, "loss": 0.0042, "step": 9399 }, { "epoch": 6.511950121233114, "grad_norm": 0.1383947730064392, "learning_rate": 3.488210818307906e-06, "loss": 0.0027, "step": 9400 }, { "epoch": 6.512642881884309, "grad_norm": 0.1966572403907776, "learning_rate": 3.4875173370319003e-06, "loss": 0.0027, "step": 9401 }, { "epoch": 6.513335642535504, "grad_norm": 0.2254183143377304, "learning_rate": 3.486823855755895e-06, "loss": 0.003, "step": 9402 }, { "epoch": 6.514028403186699, "grad_norm": 0.1570611596107483, "learning_rate": 3.4861303744798893e-06, "loss": 0.0027, "step": 9403 }, { "epoch": 6.514721163837894, "grad_norm": 0.29516130685806274, "learning_rate": 3.485436893203884e-06, "loss": 0.0038, "step": 9404 }, { "epoch": 6.515413924489089, "grad_norm": 0.12006194144487381, "learning_rate": 3.4847434119278783e-06, "loss": 0.0025, "step": 9405 }, { "epoch": 6.516106685140284, "grad_norm": 0.5796042680740356, "learning_rate": 3.484049930651873e-06, "loss": 0.0036, "step": 9406 }, { "epoch": 6.516799445791479, "grad_norm": 0.1770615428686142, "learning_rate": 3.483356449375867e-06, "loss": 0.0032, "step": 9407 }, { "epoch": 6.517492206442674, "grad_norm": 0.3312567472457886, "learning_rate": 3.4826629680998614e-06, "loss": 0.0044, "step": 9408 }, { "epoch": 6.518184967093869, "grad_norm": 0.20188383758068085, "learning_rate": 3.4819694868238564e-06, "loss": 0.003, "step": 9409 }, { "epoch": 6.518877727745064, "grad_norm": 0.2747334837913513, "learning_rate": 3.4812760055478504e-06, "loss": 0.0028, "step": 9410 }, { "epoch": 6.519570488396259, "grad_norm": 0.2189648300409317, "learning_rate": 3.480582524271845e-06, "loss": 0.0033, "step": 9411 }, { "epoch": 6.520263249047455, "grad_norm": 0.23750540614128113, "learning_rate": 3.4798890429958395e-06, "loss": 0.0049, "step": 9412 }, { "epoch": 6.520956009698649, "grad_norm": 0.6962100267410278, "learning_rate": 3.4791955617198336e-06, "loss": 0.0024, "step": 9413 }, { "epoch": 6.521648770349844, "grad_norm": 0.12846559286117554, "learning_rate": 3.4785020804438285e-06, "loss": 0.0022, "step": 9414 }, { "epoch": 6.5223415310010395, "grad_norm": 0.4862101972103119, "learning_rate": 3.477808599167823e-06, "loss": 0.0053, "step": 9415 }, { "epoch": 6.523034291652234, "grad_norm": 0.2817215025424957, "learning_rate": 3.477115117891817e-06, "loss": 0.0058, "step": 9416 }, { "epoch": 6.523727052303429, "grad_norm": 0.23052118718624115, "learning_rate": 3.4764216366158116e-06, "loss": 0.003, "step": 9417 }, { "epoch": 6.5244198129546245, "grad_norm": 0.1892869770526886, "learning_rate": 3.475728155339806e-06, "loss": 0.0028, "step": 9418 }, { "epoch": 6.525112573605819, "grad_norm": 0.49516624212265015, "learning_rate": 3.4750346740638006e-06, "loss": 0.0049, "step": 9419 }, { "epoch": 6.525805334257014, "grad_norm": 0.21081629395484924, "learning_rate": 3.474341192787795e-06, "loss": 0.0045, "step": 9420 }, { "epoch": 6.526498094908209, "grad_norm": 0.16002444922924042, "learning_rate": 3.4736477115117896e-06, "loss": 0.0034, "step": 9421 }, { "epoch": 6.527190855559404, "grad_norm": 0.4152107536792755, "learning_rate": 3.4729542302357837e-06, "loss": 0.0032, "step": 9422 }, { "epoch": 6.527883616210599, "grad_norm": 0.244332954287529, "learning_rate": 3.4722607489597782e-06, "loss": 0.0025, "step": 9423 }, { "epoch": 6.528576376861794, "grad_norm": 0.22425228357315063, "learning_rate": 3.471567267683773e-06, "loss": 0.0043, "step": 9424 }, { "epoch": 6.529269137512989, "grad_norm": 0.3269823491573334, "learning_rate": 3.4708737864077672e-06, "loss": 0.003, "step": 9425 }, { "epoch": 6.529961898164184, "grad_norm": 0.4144686162471771, "learning_rate": 3.4701803051317618e-06, "loss": 0.0064, "step": 9426 }, { "epoch": 6.530654658815379, "grad_norm": 0.25652065873146057, "learning_rate": 3.4694868238557563e-06, "loss": 0.0051, "step": 9427 }, { "epoch": 6.531347419466575, "grad_norm": 0.8899679183959961, "learning_rate": 3.4687933425797504e-06, "loss": 0.0044, "step": 9428 }, { "epoch": 6.532040180117769, "grad_norm": 0.12251868844032288, "learning_rate": 3.4680998613037453e-06, "loss": 0.0023, "step": 9429 }, { "epoch": 6.532732940768964, "grad_norm": 0.2892351448535919, "learning_rate": 3.46740638002774e-06, "loss": 0.004, "step": 9430 }, { "epoch": 6.5334257014201595, "grad_norm": 0.3377503752708435, "learning_rate": 3.466712898751734e-06, "loss": 0.0056, "step": 9431 }, { "epoch": 6.534118462071355, "grad_norm": 0.7726133465766907, "learning_rate": 3.4660194174757284e-06, "loss": 0.0044, "step": 9432 }, { "epoch": 6.534811222722549, "grad_norm": 0.27427834272384644, "learning_rate": 3.4653259361997225e-06, "loss": 0.0037, "step": 9433 }, { "epoch": 6.5355039833737445, "grad_norm": 0.3087518811225891, "learning_rate": 3.4646324549237174e-06, "loss": 0.0046, "step": 9434 }, { "epoch": 6.53619674402494, "grad_norm": 0.15330451726913452, "learning_rate": 3.463938973647712e-06, "loss": 0.0027, "step": 9435 }, { "epoch": 6.536889504676134, "grad_norm": 0.4843771755695343, "learning_rate": 3.4632454923717064e-06, "loss": 0.0036, "step": 9436 }, { "epoch": 6.5375822653273294, "grad_norm": 0.2843921184539795, "learning_rate": 3.4625520110957005e-06, "loss": 0.0036, "step": 9437 }, { "epoch": 6.538275025978525, "grad_norm": 0.2954002022743225, "learning_rate": 3.461858529819695e-06, "loss": 0.0045, "step": 9438 }, { "epoch": 6.538967786629719, "grad_norm": 0.23253130912780762, "learning_rate": 3.46116504854369e-06, "loss": 0.0031, "step": 9439 }, { "epoch": 6.539660547280914, "grad_norm": 0.2086314707994461, "learning_rate": 3.460471567267684e-06, "loss": 0.0029, "step": 9440 }, { "epoch": 6.54035330793211, "grad_norm": 0.40789994597435, "learning_rate": 3.4597780859916786e-06, "loss": 0.006, "step": 9441 }, { "epoch": 6.541046068583304, "grad_norm": 0.15205442905426025, "learning_rate": 3.459084604715673e-06, "loss": 0.0029, "step": 9442 }, { "epoch": 6.541738829234499, "grad_norm": 0.21897590160369873, "learning_rate": 3.458391123439667e-06, "loss": 0.0035, "step": 9443 }, { "epoch": 6.542431589885695, "grad_norm": 0.3793187737464905, "learning_rate": 3.457697642163662e-06, "loss": 0.0049, "step": 9444 }, { "epoch": 6.543124350536889, "grad_norm": 0.48277217149734497, "learning_rate": 3.4570041608876566e-06, "loss": 0.0062, "step": 9445 }, { "epoch": 6.543817111188084, "grad_norm": 0.2049214243888855, "learning_rate": 3.4563106796116507e-06, "loss": 0.0036, "step": 9446 }, { "epoch": 6.54450987183928, "grad_norm": 0.2654341459274292, "learning_rate": 3.455617198335645e-06, "loss": 0.0037, "step": 9447 }, { "epoch": 6.545202632490475, "grad_norm": 0.6070582270622253, "learning_rate": 3.4549237170596393e-06, "loss": 0.004, "step": 9448 }, { "epoch": 6.545895393141669, "grad_norm": 0.7338226437568665, "learning_rate": 3.4542302357836342e-06, "loss": 0.0083, "step": 9449 }, { "epoch": 6.5465881537928645, "grad_norm": 0.19648605585098267, "learning_rate": 3.4535367545076287e-06, "loss": 0.0043, "step": 9450 }, { "epoch": 6.54728091444406, "grad_norm": 0.436946839094162, "learning_rate": 3.4528432732316232e-06, "loss": 0.0039, "step": 9451 }, { "epoch": 6.547973675095255, "grad_norm": 0.2569490969181061, "learning_rate": 3.4521497919556173e-06, "loss": 0.0042, "step": 9452 }, { "epoch": 6.5486664357464495, "grad_norm": 0.3160373568534851, "learning_rate": 3.451456310679612e-06, "loss": 0.0038, "step": 9453 }, { "epoch": 6.549359196397645, "grad_norm": 0.23284515738487244, "learning_rate": 3.4507628294036068e-06, "loss": 0.0032, "step": 9454 }, { "epoch": 6.55005195704884, "grad_norm": 0.45133253931999207, "learning_rate": 3.450069348127601e-06, "loss": 0.0041, "step": 9455 }, { "epoch": 6.550744717700034, "grad_norm": 0.5596340298652649, "learning_rate": 3.4493758668515954e-06, "loss": 0.0057, "step": 9456 }, { "epoch": 6.55143747835123, "grad_norm": 0.4074881374835968, "learning_rate": 3.4486823855755894e-06, "loss": 0.0068, "step": 9457 }, { "epoch": 6.552130239002425, "grad_norm": 0.21876654028892517, "learning_rate": 3.447988904299584e-06, "loss": 0.0037, "step": 9458 }, { "epoch": 6.552822999653619, "grad_norm": 0.17608174681663513, "learning_rate": 3.447295423023579e-06, "loss": 0.0028, "step": 9459 }, { "epoch": 6.553515760304815, "grad_norm": 0.17134511470794678, "learning_rate": 3.4466019417475734e-06, "loss": 0.0032, "step": 9460 }, { "epoch": 6.55420852095601, "grad_norm": 0.8364170789718628, "learning_rate": 3.4459084604715675e-06, "loss": 0.0043, "step": 9461 }, { "epoch": 6.554901281607204, "grad_norm": 0.39484527707099915, "learning_rate": 3.445214979195562e-06, "loss": 0.0039, "step": 9462 }, { "epoch": 6.5555940422584, "grad_norm": 0.35075441002845764, "learning_rate": 3.444521497919556e-06, "loss": 0.006, "step": 9463 }, { "epoch": 6.556286802909595, "grad_norm": 0.278879851102829, "learning_rate": 3.443828016643551e-06, "loss": 0.0037, "step": 9464 }, { "epoch": 6.556979563560789, "grad_norm": 0.2113790512084961, "learning_rate": 3.4431345353675455e-06, "loss": 0.0037, "step": 9465 }, { "epoch": 6.5576723242119845, "grad_norm": 0.2821394205093384, "learning_rate": 3.44244105409154e-06, "loss": 0.0054, "step": 9466 }, { "epoch": 6.55836508486318, "grad_norm": 0.2962658107280731, "learning_rate": 3.441747572815534e-06, "loss": 0.0033, "step": 9467 }, { "epoch": 6.559057845514375, "grad_norm": 0.16165822744369507, "learning_rate": 3.4410540915395286e-06, "loss": 0.0029, "step": 9468 }, { "epoch": 6.5597506061655695, "grad_norm": 0.560104250907898, "learning_rate": 3.4403606102635236e-06, "loss": 0.0068, "step": 9469 }, { "epoch": 6.560443366816765, "grad_norm": 0.19999182224273682, "learning_rate": 3.4396671289875176e-06, "loss": 0.003, "step": 9470 }, { "epoch": 6.56113612746796, "grad_norm": 0.3037041425704956, "learning_rate": 3.438973647711512e-06, "loss": 0.0053, "step": 9471 }, { "epoch": 6.561828888119155, "grad_norm": 0.2861514985561371, "learning_rate": 3.4382801664355062e-06, "loss": 0.0031, "step": 9472 }, { "epoch": 6.56252164877035, "grad_norm": 0.24474790692329407, "learning_rate": 3.4375866851595008e-06, "loss": 0.0037, "step": 9473 }, { "epoch": 6.563214409421545, "grad_norm": 0.3315252959728241, "learning_rate": 3.4368932038834957e-06, "loss": 0.0054, "step": 9474 }, { "epoch": 6.56390717007274, "grad_norm": 0.24230967462062836, "learning_rate": 3.43619972260749e-06, "loss": 0.0051, "step": 9475 }, { "epoch": 6.564599930723935, "grad_norm": 0.25363656878471375, "learning_rate": 3.4355062413314843e-06, "loss": 0.0035, "step": 9476 }, { "epoch": 6.56529269137513, "grad_norm": 0.2314106523990631, "learning_rate": 3.434812760055479e-06, "loss": 0.0043, "step": 9477 }, { "epoch": 6.565985452026325, "grad_norm": 0.3255745470523834, "learning_rate": 3.434119278779473e-06, "loss": 0.0036, "step": 9478 }, { "epoch": 6.56667821267752, "grad_norm": 0.25917908549308777, "learning_rate": 3.433425797503468e-06, "loss": 0.0057, "step": 9479 }, { "epoch": 6.567370973328715, "grad_norm": 0.1794329434633255, "learning_rate": 3.4327323162274623e-06, "loss": 0.0031, "step": 9480 }, { "epoch": 6.56806373397991, "grad_norm": 0.31018608808517456, "learning_rate": 3.4320388349514564e-06, "loss": 0.0051, "step": 9481 }, { "epoch": 6.568756494631105, "grad_norm": 0.25144803524017334, "learning_rate": 3.431345353675451e-06, "loss": 0.005, "step": 9482 }, { "epoch": 6.5694492552823, "grad_norm": 0.7011420130729675, "learning_rate": 3.4306518723994454e-06, "loss": 0.0065, "step": 9483 }, { "epoch": 6.570142015933495, "grad_norm": 0.2495323121547699, "learning_rate": 3.4299583911234404e-06, "loss": 0.0044, "step": 9484 }, { "epoch": 6.5708347765846895, "grad_norm": 0.20269997417926788, "learning_rate": 3.4292649098474345e-06, "loss": 0.0038, "step": 9485 }, { "epoch": 6.571527537235885, "grad_norm": 0.29128703474998474, "learning_rate": 3.428571428571429e-06, "loss": 0.0043, "step": 9486 }, { "epoch": 6.57222029788708, "grad_norm": 0.48349523544311523, "learning_rate": 3.427877947295423e-06, "loss": 0.0046, "step": 9487 }, { "epoch": 6.572913058538275, "grad_norm": 0.2877331078052521, "learning_rate": 3.4271844660194176e-06, "loss": 0.0038, "step": 9488 }, { "epoch": 6.57360581918947, "grad_norm": 0.19083867967128754, "learning_rate": 3.4264909847434125e-06, "loss": 0.0034, "step": 9489 }, { "epoch": 6.574298579840665, "grad_norm": 0.39765849709510803, "learning_rate": 3.425797503467407e-06, "loss": 0.0044, "step": 9490 }, { "epoch": 6.57499134049186, "grad_norm": 0.39917293190956116, "learning_rate": 3.425104022191401e-06, "loss": 0.0048, "step": 9491 }, { "epoch": 6.575684101143056, "grad_norm": 0.3934260308742523, "learning_rate": 3.4244105409153956e-06, "loss": 0.0071, "step": 9492 }, { "epoch": 6.57637686179425, "grad_norm": 0.30203375220298767, "learning_rate": 3.4237170596393897e-06, "loss": 0.0041, "step": 9493 }, { "epoch": 6.577069622445445, "grad_norm": 0.29003268480300903, "learning_rate": 3.4230235783633846e-06, "loss": 0.0045, "step": 9494 }, { "epoch": 6.5777623830966405, "grad_norm": 0.30671316385269165, "learning_rate": 3.422330097087379e-06, "loss": 0.0047, "step": 9495 }, { "epoch": 6.578455143747835, "grad_norm": 0.44722458720207214, "learning_rate": 3.4216366158113732e-06, "loss": 0.0034, "step": 9496 }, { "epoch": 6.57914790439903, "grad_norm": 0.2024839073419571, "learning_rate": 3.4209431345353677e-06, "loss": 0.0041, "step": 9497 }, { "epoch": 6.5798406650502255, "grad_norm": 0.3256869316101074, "learning_rate": 3.4202496532593622e-06, "loss": 0.0035, "step": 9498 }, { "epoch": 6.58053342570142, "grad_norm": 0.19540810585021973, "learning_rate": 3.419556171983357e-06, "loss": 0.0027, "step": 9499 }, { "epoch": 6.581226186352615, "grad_norm": 0.664091169834137, "learning_rate": 3.4188626907073513e-06, "loss": 0.0035, "step": 9500 }, { "epoch": 6.58191894700381, "grad_norm": 0.3423573970794678, "learning_rate": 3.4181692094313458e-06, "loss": 0.0056, "step": 9501 }, { "epoch": 6.582611707655005, "grad_norm": 0.27303338050842285, "learning_rate": 3.41747572815534e-06, "loss": 0.0039, "step": 9502 }, { "epoch": 6.5833044683062, "grad_norm": 0.18630249798297882, "learning_rate": 3.4167822468793344e-06, "loss": 0.003, "step": 9503 }, { "epoch": 6.583997228957395, "grad_norm": 0.4994889795780182, "learning_rate": 3.4160887656033293e-06, "loss": 0.0059, "step": 9504 }, { "epoch": 6.58468998960859, "grad_norm": 0.37421154975891113, "learning_rate": 3.415395284327324e-06, "loss": 0.0077, "step": 9505 }, { "epoch": 6.585382750259785, "grad_norm": 0.17351457476615906, "learning_rate": 3.414701803051318e-06, "loss": 0.0031, "step": 9506 }, { "epoch": 6.58607551091098, "grad_norm": 0.2850160300731659, "learning_rate": 3.4140083217753124e-06, "loss": 0.0036, "step": 9507 }, { "epoch": 6.586768271562176, "grad_norm": 0.30493614077568054, "learning_rate": 3.4133148404993065e-06, "loss": 0.0044, "step": 9508 }, { "epoch": 6.58746103221337, "grad_norm": 0.7638171315193176, "learning_rate": 3.4126213592233014e-06, "loss": 0.0046, "step": 9509 }, { "epoch": 6.588153792864565, "grad_norm": 0.2794395685195923, "learning_rate": 3.411927877947296e-06, "loss": 0.0035, "step": 9510 }, { "epoch": 6.5888465535157605, "grad_norm": 0.41764628887176514, "learning_rate": 3.41123439667129e-06, "loss": 0.0053, "step": 9511 }, { "epoch": 6.589539314166955, "grad_norm": 0.5977731347084045, "learning_rate": 3.4105409153952845e-06, "loss": 0.0068, "step": 9512 }, { "epoch": 6.59023207481815, "grad_norm": 0.14589254558086395, "learning_rate": 3.409847434119279e-06, "loss": 0.0033, "step": 9513 }, { "epoch": 6.5909248354693455, "grad_norm": 0.22986231744289398, "learning_rate": 3.409153952843274e-06, "loss": 0.0041, "step": 9514 }, { "epoch": 6.591617596120541, "grad_norm": 0.266808420419693, "learning_rate": 3.408460471567268e-06, "loss": 0.0036, "step": 9515 }, { "epoch": 6.592310356771735, "grad_norm": 0.35455387830734253, "learning_rate": 3.4077669902912626e-06, "loss": 0.0061, "step": 9516 }, { "epoch": 6.59300311742293, "grad_norm": Infinity, "learning_rate": 3.4077669902912626e-06, "loss": 0.0046, "step": 9517 }, { "epoch": 6.593695878074126, "grad_norm": 0.28056949377059937, "learning_rate": 3.4070735090152566e-06, "loss": 0.0051, "step": 9518 }, { "epoch": 6.59438863872532, "grad_norm": 0.22811107337474823, "learning_rate": 3.406380027739251e-06, "loss": 0.0033, "step": 9519 }, { "epoch": 6.595081399376515, "grad_norm": 0.24492451548576355, "learning_rate": 3.405686546463246e-06, "loss": 0.0038, "step": 9520 }, { "epoch": 6.595774160027711, "grad_norm": 0.27364006638526917, "learning_rate": 3.40499306518724e-06, "loss": 0.0037, "step": 9521 }, { "epoch": 6.596466920678905, "grad_norm": 0.3937765955924988, "learning_rate": 3.4042995839112347e-06, "loss": 0.0031, "step": 9522 }, { "epoch": 6.5971596813301, "grad_norm": 0.5127364993095398, "learning_rate": 3.403606102635229e-06, "loss": 0.004, "step": 9523 }, { "epoch": 6.597852441981296, "grad_norm": 0.2622610032558441, "learning_rate": 3.4029126213592233e-06, "loss": 0.0041, "step": 9524 }, { "epoch": 6.59854520263249, "grad_norm": 0.2696210741996765, "learning_rate": 3.4022191400832182e-06, "loss": 0.0031, "step": 9525 }, { "epoch": 6.599237963283685, "grad_norm": 0.29049065709114075, "learning_rate": 3.4015256588072127e-06, "loss": 0.0049, "step": 9526 }, { "epoch": 6.599930723934881, "grad_norm": 0.48603224754333496, "learning_rate": 3.400832177531207e-06, "loss": 0.0055, "step": 9527 }, { "epoch": 6.600623484586076, "grad_norm": 0.33300310373306274, "learning_rate": 3.4001386962552013e-06, "loss": 0.004, "step": 9528 }, { "epoch": 6.60131624523727, "grad_norm": 0.25161683559417725, "learning_rate": 3.399445214979196e-06, "loss": 0.0032, "step": 9529 }, { "epoch": 6.6020090058884655, "grad_norm": 0.2611345946788788, "learning_rate": 3.3987517337031908e-06, "loss": 0.0042, "step": 9530 }, { "epoch": 6.602701766539661, "grad_norm": 0.23604142665863037, "learning_rate": 3.398058252427185e-06, "loss": 0.0044, "step": 9531 }, { "epoch": 6.603394527190855, "grad_norm": 0.2808261215686798, "learning_rate": 3.3973647711511794e-06, "loss": 0.0052, "step": 9532 }, { "epoch": 6.6040872878420505, "grad_norm": 0.17070677876472473, "learning_rate": 3.3966712898751735e-06, "loss": 0.0031, "step": 9533 }, { "epoch": 6.604780048493246, "grad_norm": 0.2397252768278122, "learning_rate": 3.395977808599168e-06, "loss": 0.0045, "step": 9534 }, { "epoch": 6.605472809144441, "grad_norm": 0.25312379002571106, "learning_rate": 3.395284327323163e-06, "loss": 0.0042, "step": 9535 }, { "epoch": 6.606165569795635, "grad_norm": 0.22014859318733215, "learning_rate": 3.394590846047157e-06, "loss": 0.0028, "step": 9536 }, { "epoch": 6.606858330446831, "grad_norm": 0.2628215551376343, "learning_rate": 3.3938973647711515e-06, "loss": 0.0042, "step": 9537 }, { "epoch": 6.607551091098026, "grad_norm": 0.36326444149017334, "learning_rate": 3.393203883495146e-06, "loss": 0.0041, "step": 9538 }, { "epoch": 6.60824385174922, "grad_norm": 0.15043947100639343, "learning_rate": 3.39251040221914e-06, "loss": 0.0027, "step": 9539 }, { "epoch": 6.608936612400416, "grad_norm": 0.18471574783325195, "learning_rate": 3.391816920943135e-06, "loss": 0.0034, "step": 9540 }, { "epoch": 6.609629373051611, "grad_norm": 0.2622855007648468, "learning_rate": 3.3911234396671295e-06, "loss": 0.0044, "step": 9541 }, { "epoch": 6.610322133702805, "grad_norm": 0.3304399251937866, "learning_rate": 3.3904299583911236e-06, "loss": 0.0036, "step": 9542 }, { "epoch": 6.611014894354001, "grad_norm": 0.19413425028324127, "learning_rate": 3.389736477115118e-06, "loss": 0.0046, "step": 9543 }, { "epoch": 6.611707655005196, "grad_norm": 0.2844732403755188, "learning_rate": 3.3890429958391126e-06, "loss": 0.0053, "step": 9544 }, { "epoch": 6.61240041565639, "grad_norm": 0.2866968512535095, "learning_rate": 3.388349514563107e-06, "loss": 0.0043, "step": 9545 }, { "epoch": 6.6130931763075855, "grad_norm": 0.2924291491508484, "learning_rate": 3.3876560332871017e-06, "loss": 0.0038, "step": 9546 }, { "epoch": 6.613785936958781, "grad_norm": 0.24228566884994507, "learning_rate": 3.386962552011096e-06, "loss": 0.005, "step": 9547 }, { "epoch": 6.614478697609976, "grad_norm": 0.31746983528137207, "learning_rate": 3.3862690707350903e-06, "loss": 0.0052, "step": 9548 }, { "epoch": 6.6151714582611705, "grad_norm": 0.18304447829723358, "learning_rate": 3.3855755894590848e-06, "loss": 0.0039, "step": 9549 }, { "epoch": 6.615864218912366, "grad_norm": 0.27996575832366943, "learning_rate": 3.3848821081830797e-06, "loss": 0.0042, "step": 9550 }, { "epoch": 6.616556979563561, "grad_norm": 0.35377106070518494, "learning_rate": 3.3841886269070738e-06, "loss": 0.0037, "step": 9551 }, { "epoch": 6.617249740214755, "grad_norm": 0.2977728247642517, "learning_rate": 3.3834951456310683e-06, "loss": 0.0038, "step": 9552 }, { "epoch": 6.617942500865951, "grad_norm": 0.40454521775245667, "learning_rate": 3.382801664355063e-06, "loss": 0.0061, "step": 9553 }, { "epoch": 6.618635261517146, "grad_norm": 0.2601086497306824, "learning_rate": 3.382108183079057e-06, "loss": 0.0048, "step": 9554 }, { "epoch": 6.619328022168341, "grad_norm": 0.2615302503108978, "learning_rate": 3.381414701803052e-06, "loss": 0.0044, "step": 9555 }, { "epoch": 6.620020782819536, "grad_norm": 0.19293847680091858, "learning_rate": 3.3807212205270463e-06, "loss": 0.0029, "step": 9556 }, { "epoch": 6.620713543470731, "grad_norm": 0.24798336625099182, "learning_rate": 3.3800277392510404e-06, "loss": 0.0046, "step": 9557 }, { "epoch": 6.621406304121926, "grad_norm": 0.266619473695755, "learning_rate": 3.379334257975035e-06, "loss": 0.0037, "step": 9558 }, { "epoch": 6.622099064773121, "grad_norm": 0.2793313264846802, "learning_rate": 3.3786407766990294e-06, "loss": 0.0041, "step": 9559 }, { "epoch": 6.622791825424316, "grad_norm": 0.20297564566135406, "learning_rate": 3.377947295423024e-06, "loss": 0.0037, "step": 9560 }, { "epoch": 6.623484586075511, "grad_norm": 0.3926437199115753, "learning_rate": 3.3772538141470185e-06, "loss": 0.0054, "step": 9561 }, { "epoch": 6.6241773467267056, "grad_norm": 0.27789077162742615, "learning_rate": 3.376560332871013e-06, "loss": 0.0039, "step": 9562 }, { "epoch": 6.624870107377901, "grad_norm": 0.3504057824611664, "learning_rate": 3.375866851595007e-06, "loss": 0.0046, "step": 9563 }, { "epoch": 6.625562868029096, "grad_norm": 0.7347978353500366, "learning_rate": 3.3751733703190016e-06, "loss": 0.0046, "step": 9564 }, { "epoch": 6.6262556286802905, "grad_norm": 0.2537793219089508, "learning_rate": 3.3744798890429957e-06, "loss": 0.0039, "step": 9565 }, { "epoch": 6.626948389331486, "grad_norm": 0.36182963848114014, "learning_rate": 3.3737864077669906e-06, "loss": 0.0034, "step": 9566 }, { "epoch": 6.627641149982681, "grad_norm": 0.14422914385795593, "learning_rate": 3.373092926490985e-06, "loss": 0.0025, "step": 9567 }, { "epoch": 6.628333910633876, "grad_norm": 0.25976353883743286, "learning_rate": 3.3723994452149796e-06, "loss": 0.0043, "step": 9568 }, { "epoch": 6.629026671285071, "grad_norm": 0.3340349495410919, "learning_rate": 3.3717059639389737e-06, "loss": 0.0063, "step": 9569 }, { "epoch": 6.629719431936266, "grad_norm": 0.46246954798698425, "learning_rate": 3.371012482662968e-06, "loss": 0.0049, "step": 9570 }, { "epoch": 6.630412192587461, "grad_norm": 0.2905452251434326, "learning_rate": 3.370319001386963e-06, "loss": 0.0051, "step": 9571 }, { "epoch": 6.631104953238656, "grad_norm": 0.26880714297294617, "learning_rate": 3.3696255201109572e-06, "loss": 0.0032, "step": 9572 }, { "epoch": 6.631797713889851, "grad_norm": 0.2723860740661621, "learning_rate": 3.3689320388349517e-06, "loss": 0.0046, "step": 9573 }, { "epoch": 6.632490474541046, "grad_norm": 0.23139895498752594, "learning_rate": 3.368238557558946e-06, "loss": 0.0046, "step": 9574 }, { "epoch": 6.6331832351922415, "grad_norm": 0.40411585569381714, "learning_rate": 3.3675450762829403e-06, "loss": 0.0039, "step": 9575 }, { "epoch": 6.633875995843436, "grad_norm": 0.20245085656642914, "learning_rate": 3.3668515950069353e-06, "loss": 0.0043, "step": 9576 }, { "epoch": 6.634568756494631, "grad_norm": 0.1982521414756775, "learning_rate": 3.3661581137309298e-06, "loss": 0.003, "step": 9577 }, { "epoch": 6.6352615171458265, "grad_norm": 0.290549099445343, "learning_rate": 3.365464632454924e-06, "loss": 0.0048, "step": 9578 }, { "epoch": 6.635954277797021, "grad_norm": 0.23341302573680878, "learning_rate": 3.3647711511789184e-06, "loss": 0.005, "step": 9579 }, { "epoch": 6.636647038448216, "grad_norm": 0.31134483218193054, "learning_rate": 3.3640776699029125e-06, "loss": 0.0039, "step": 9580 }, { "epoch": 6.637339799099411, "grad_norm": 0.26159578561782837, "learning_rate": 3.3633841886269074e-06, "loss": 0.0043, "step": 9581 }, { "epoch": 6.638032559750606, "grad_norm": 0.22782525420188904, "learning_rate": 3.362690707350902e-06, "loss": 0.0036, "step": 9582 }, { "epoch": 6.638725320401801, "grad_norm": 0.21495933830738068, "learning_rate": 3.3619972260748964e-06, "loss": 0.0031, "step": 9583 }, { "epoch": 6.639418081052996, "grad_norm": 0.13852988183498383, "learning_rate": 3.3613037447988905e-06, "loss": 0.0026, "step": 9584 }, { "epoch": 6.640110841704191, "grad_norm": 0.3558557331562042, "learning_rate": 3.360610263522885e-06, "loss": 0.0043, "step": 9585 }, { "epoch": 6.640803602355386, "grad_norm": 0.32118579745292664, "learning_rate": 3.35991678224688e-06, "loss": 0.0037, "step": 9586 }, { "epoch": 6.641496363006581, "grad_norm": 0.25010016560554504, "learning_rate": 3.359223300970874e-06, "loss": 0.0035, "step": 9587 }, { "epoch": 6.642189123657777, "grad_norm": 0.1745690405368805, "learning_rate": 3.3585298196948685e-06, "loss": 0.0026, "step": 9588 }, { "epoch": 6.642881884308971, "grad_norm": 0.20617301762104034, "learning_rate": 3.3578363384188626e-06, "loss": 0.0043, "step": 9589 }, { "epoch": 6.643574644960166, "grad_norm": 0.3337268829345703, "learning_rate": 3.357142857142857e-06, "loss": 0.0042, "step": 9590 }, { "epoch": 6.6442674056113615, "grad_norm": 0.37391218543052673, "learning_rate": 3.356449375866852e-06, "loss": 0.0046, "step": 9591 }, { "epoch": 6.644960166262556, "grad_norm": 0.42142197489738464, "learning_rate": 3.3557558945908466e-06, "loss": 0.0031, "step": 9592 }, { "epoch": 6.645652926913751, "grad_norm": 0.36293545365333557, "learning_rate": 3.3550624133148407e-06, "loss": 0.0036, "step": 9593 }, { "epoch": 6.6463456875649465, "grad_norm": 0.10082734376192093, "learning_rate": 3.354368932038835e-06, "loss": 0.002, "step": 9594 }, { "epoch": 6.647038448216142, "grad_norm": 0.386010080575943, "learning_rate": 3.3536754507628293e-06, "loss": 0.0034, "step": 9595 }, { "epoch": 6.647731208867336, "grad_norm": 0.3421812355518341, "learning_rate": 3.352981969486824e-06, "loss": 0.005, "step": 9596 }, { "epoch": 6.648423969518531, "grad_norm": 0.3279930353164673, "learning_rate": 3.3522884882108187e-06, "loss": 0.0035, "step": 9597 }, { "epoch": 6.649116730169727, "grad_norm": 0.17352928221225739, "learning_rate": 3.3515950069348128e-06, "loss": 0.0037, "step": 9598 }, { "epoch": 6.649809490820921, "grad_norm": 0.16105639934539795, "learning_rate": 3.3509015256588073e-06, "loss": 0.0025, "step": 9599 }, { "epoch": 6.650502251472116, "grad_norm": 0.44193294644355774, "learning_rate": 3.350208044382802e-06, "loss": 0.0048, "step": 9600 }, { "epoch": 6.651195012123312, "grad_norm": 0.21000400185585022, "learning_rate": 3.3495145631067967e-06, "loss": 0.0029, "step": 9601 }, { "epoch": 6.651887772774506, "grad_norm": 0.2680608630180359, "learning_rate": 3.348821081830791e-06, "loss": 0.0039, "step": 9602 }, { "epoch": 6.652580533425701, "grad_norm": 0.45542842149734497, "learning_rate": 3.3481276005547853e-06, "loss": 0.0046, "step": 9603 }, { "epoch": 6.653273294076897, "grad_norm": 0.27576544880867004, "learning_rate": 3.3474341192787794e-06, "loss": 0.0037, "step": 9604 }, { "epoch": 6.653966054728091, "grad_norm": 0.31914380192756653, "learning_rate": 3.346740638002774e-06, "loss": 0.0042, "step": 9605 }, { "epoch": 6.654658815379286, "grad_norm": 0.44567424058914185, "learning_rate": 3.346047156726769e-06, "loss": 0.0065, "step": 9606 }, { "epoch": 6.6553515760304816, "grad_norm": 0.24427129328250885, "learning_rate": 3.3453536754507634e-06, "loss": 0.0038, "step": 9607 }, { "epoch": 6.656044336681677, "grad_norm": 0.24412909150123596, "learning_rate": 3.3446601941747575e-06, "loss": 0.0031, "step": 9608 }, { "epoch": 6.656737097332871, "grad_norm": 0.3295021653175354, "learning_rate": 3.343966712898752e-06, "loss": 0.0037, "step": 9609 }, { "epoch": 6.6574298579840665, "grad_norm": 0.2805141806602478, "learning_rate": 3.343273231622746e-06, "loss": 0.0057, "step": 9610 }, { "epoch": 6.658122618635262, "grad_norm": 0.28327521681785583, "learning_rate": 3.342579750346741e-06, "loss": 0.0049, "step": 9611 }, { "epoch": 6.658815379286456, "grad_norm": 0.20474810898303986, "learning_rate": 3.3418862690707355e-06, "loss": 0.0038, "step": 9612 }, { "epoch": 6.6595081399376514, "grad_norm": 0.42438840866088867, "learning_rate": 3.3411927877947296e-06, "loss": 0.0061, "step": 9613 }, { "epoch": 6.660200900588847, "grad_norm": 0.1490003913640976, "learning_rate": 3.340499306518724e-06, "loss": 0.0034, "step": 9614 }, { "epoch": 6.660893661240042, "grad_norm": 0.4401046931743622, "learning_rate": 3.3398058252427186e-06, "loss": 0.0041, "step": 9615 }, { "epoch": 6.661586421891236, "grad_norm": 0.1705765724182129, "learning_rate": 3.3391123439667135e-06, "loss": 0.0027, "step": 9616 }, { "epoch": 6.662279182542432, "grad_norm": 0.30720090866088867, "learning_rate": 3.3384188626907076e-06, "loss": 0.0044, "step": 9617 }, { "epoch": 6.662971943193627, "grad_norm": 0.33989569544792175, "learning_rate": 3.337725381414702e-06, "loss": 0.0044, "step": 9618 }, { "epoch": 6.663664703844821, "grad_norm": 0.20146344602108002, "learning_rate": 3.3370319001386962e-06, "loss": 0.0039, "step": 9619 }, { "epoch": 6.664357464496017, "grad_norm": 0.180581197142601, "learning_rate": 3.3363384188626907e-06, "loss": 0.0041, "step": 9620 }, { "epoch": 6.665050225147212, "grad_norm": 0.18227854371070862, "learning_rate": 3.3356449375866857e-06, "loss": 0.0032, "step": 9621 }, { "epoch": 6.665742985798406, "grad_norm": 0.7514982223510742, "learning_rate": 3.3349514563106797e-06, "loss": 0.0044, "step": 9622 }, { "epoch": 6.666435746449602, "grad_norm": 0.2514946162700653, "learning_rate": 3.3342579750346743e-06, "loss": 0.0035, "step": 9623 }, { "epoch": 6.667128507100797, "grad_norm": 0.2893274128437042, "learning_rate": 3.3335644937586688e-06, "loss": 0.0055, "step": 9624 }, { "epoch": 6.667821267751991, "grad_norm": 0.3611743748188019, "learning_rate": 3.332871012482663e-06, "loss": 0.0048, "step": 9625 }, { "epoch": 6.6685140284031865, "grad_norm": 0.2338365763425827, "learning_rate": 3.332177531206658e-06, "loss": 0.0033, "step": 9626 }, { "epoch": 6.669206789054382, "grad_norm": 0.22902587056159973, "learning_rate": 3.3314840499306523e-06, "loss": 0.0037, "step": 9627 }, { "epoch": 6.669899549705577, "grad_norm": 0.24569883942604065, "learning_rate": 3.3307905686546464e-06, "loss": 0.0038, "step": 9628 }, { "epoch": 6.6705923103567715, "grad_norm": 0.723281979560852, "learning_rate": 3.330097087378641e-06, "loss": 0.005, "step": 9629 }, { "epoch": 6.671285071007967, "grad_norm": 0.27090781927108765, "learning_rate": 3.3294036061026354e-06, "loss": 0.0038, "step": 9630 }, { "epoch": 6.671977831659162, "grad_norm": 0.44531315565109253, "learning_rate": 3.3287101248266303e-06, "loss": 0.0037, "step": 9631 }, { "epoch": 6.672670592310356, "grad_norm": 0.4721844494342804, "learning_rate": 3.3280166435506244e-06, "loss": 0.0067, "step": 9632 }, { "epoch": 6.673363352961552, "grad_norm": 0.2333526611328125, "learning_rate": 3.327323162274619e-06, "loss": 0.0038, "step": 9633 }, { "epoch": 6.674056113612747, "grad_norm": 0.22444242238998413, "learning_rate": 3.326629680998613e-06, "loss": 0.0048, "step": 9634 }, { "epoch": 6.674748874263942, "grad_norm": 0.7265631556510925, "learning_rate": 3.3259361997226075e-06, "loss": 0.0065, "step": 9635 }, { "epoch": 6.675441634915137, "grad_norm": 0.45793724060058594, "learning_rate": 3.3252427184466025e-06, "loss": 0.0038, "step": 9636 }, { "epoch": 6.676134395566332, "grad_norm": 0.21917425096035004, "learning_rate": 3.3245492371705966e-06, "loss": 0.0038, "step": 9637 }, { "epoch": 6.676827156217527, "grad_norm": 0.26209592819213867, "learning_rate": 3.323855755894591e-06, "loss": 0.0043, "step": 9638 }, { "epoch": 6.677519916868722, "grad_norm": 0.25796249508857727, "learning_rate": 3.3231622746185856e-06, "loss": 0.0048, "step": 9639 }, { "epoch": 6.678212677519917, "grad_norm": 0.26896247267723083, "learning_rate": 3.3224687933425797e-06, "loss": 0.0037, "step": 9640 }, { "epoch": 6.678905438171112, "grad_norm": 0.2653186023235321, "learning_rate": 3.3217753120665746e-06, "loss": 0.004, "step": 9641 }, { "epoch": 6.6795981988223065, "grad_norm": 0.16648922860622406, "learning_rate": 3.321081830790569e-06, "loss": 0.0035, "step": 9642 }, { "epoch": 6.680290959473502, "grad_norm": 0.26881250739097595, "learning_rate": 3.320388349514563e-06, "loss": 0.0042, "step": 9643 }, { "epoch": 6.680983720124697, "grad_norm": 0.2109580636024475, "learning_rate": 3.3196948682385577e-06, "loss": 0.0029, "step": 9644 }, { "epoch": 6.6816764807758915, "grad_norm": 0.38735663890838623, "learning_rate": 3.319001386962552e-06, "loss": 0.0059, "step": 9645 }, { "epoch": 6.682369241427087, "grad_norm": 0.17858567833900452, "learning_rate": 3.3183079056865467e-06, "loss": 0.0037, "step": 9646 }, { "epoch": 6.683062002078282, "grad_norm": 0.2825709879398346, "learning_rate": 3.3176144244105412e-06, "loss": 0.0043, "step": 9647 }, { "epoch": 6.683754762729477, "grad_norm": 0.18355581164360046, "learning_rate": 3.3169209431345357e-06, "loss": 0.0033, "step": 9648 }, { "epoch": 6.684447523380672, "grad_norm": 0.21532532572746277, "learning_rate": 3.31622746185853e-06, "loss": 0.0035, "step": 9649 }, { "epoch": 6.685140284031867, "grad_norm": 0.4155719578266144, "learning_rate": 3.3155339805825243e-06, "loss": 0.0043, "step": 9650 }, { "epoch": 6.685833044683062, "grad_norm": 0.4123566150665283, "learning_rate": 3.3148404993065193e-06, "loss": 0.006, "step": 9651 }, { "epoch": 6.686525805334257, "grad_norm": 0.236257404088974, "learning_rate": 3.3141470180305134e-06, "loss": 0.0027, "step": 9652 }, { "epoch": 6.687218565985452, "grad_norm": 0.31226691603660583, "learning_rate": 3.313453536754508e-06, "loss": 0.0047, "step": 9653 }, { "epoch": 6.687911326636647, "grad_norm": 0.35732463002204895, "learning_rate": 3.3127600554785024e-06, "loss": 0.0059, "step": 9654 }, { "epoch": 6.6886040872878425, "grad_norm": 0.3564273715019226, "learning_rate": 3.3120665742024965e-06, "loss": 0.0047, "step": 9655 }, { "epoch": 6.689296847939037, "grad_norm": 0.30896562337875366, "learning_rate": 3.3113730929264914e-06, "loss": 0.0059, "step": 9656 }, { "epoch": 6.689989608590232, "grad_norm": 0.7997479438781738, "learning_rate": 3.310679611650486e-06, "loss": 0.0056, "step": 9657 }, { "epoch": 6.6906823692414275, "grad_norm": 0.22391101717948914, "learning_rate": 3.30998613037448e-06, "loss": 0.0027, "step": 9658 }, { "epoch": 6.691375129892622, "grad_norm": 0.2137303203344345, "learning_rate": 3.3092926490984745e-06, "loss": 0.0031, "step": 9659 }, { "epoch": 6.692067890543817, "grad_norm": 0.14612312614917755, "learning_rate": 3.308599167822469e-06, "loss": 0.0025, "step": 9660 }, { "epoch": 6.692760651195012, "grad_norm": 0.47494062781333923, "learning_rate": 3.3079056865464635e-06, "loss": 0.0042, "step": 9661 }, { "epoch": 6.693453411846207, "grad_norm": 0.36330175399780273, "learning_rate": 3.307212205270458e-06, "loss": 0.0042, "step": 9662 }, { "epoch": 6.694146172497402, "grad_norm": 0.4094347059726715, "learning_rate": 3.3065187239944525e-06, "loss": 0.0049, "step": 9663 }, { "epoch": 6.694838933148597, "grad_norm": 0.2243693619966507, "learning_rate": 3.3058252427184466e-06, "loss": 0.0037, "step": 9664 }, { "epoch": 6.695531693799792, "grad_norm": 0.46385589241981506, "learning_rate": 3.305131761442441e-06, "loss": 0.0038, "step": 9665 }, { "epoch": 6.696224454450987, "grad_norm": 0.3777003288269043, "learning_rate": 3.304438280166436e-06, "loss": 0.0053, "step": 9666 }, { "epoch": 6.696917215102182, "grad_norm": 0.46151116490364075, "learning_rate": 3.30374479889043e-06, "loss": 0.0046, "step": 9667 }, { "epoch": 6.697609975753378, "grad_norm": 0.24645903706550598, "learning_rate": 3.3030513176144247e-06, "loss": 0.0027, "step": 9668 }, { "epoch": 6.698302736404572, "grad_norm": 0.3040834367275238, "learning_rate": 3.302357836338419e-06, "loss": 0.0037, "step": 9669 }, { "epoch": 6.698995497055767, "grad_norm": 0.3605433404445648, "learning_rate": 3.3016643550624133e-06, "loss": 0.006, "step": 9670 }, { "epoch": 6.6996882577069625, "grad_norm": 0.21150998771190643, "learning_rate": 3.300970873786408e-06, "loss": 0.0038, "step": 9671 }, { "epoch": 6.700381018358157, "grad_norm": 0.23658040165901184, "learning_rate": 3.3002773925104027e-06, "loss": 0.0033, "step": 9672 }, { "epoch": 6.701073779009352, "grad_norm": 0.19974714517593384, "learning_rate": 3.299583911234397e-06, "loss": 0.0034, "step": 9673 }, { "epoch": 6.7017665396605475, "grad_norm": 0.2631831467151642, "learning_rate": 3.2988904299583913e-06, "loss": 0.0039, "step": 9674 }, { "epoch": 6.702459300311743, "grad_norm": 0.22800016403198242, "learning_rate": 3.298196948682386e-06, "loss": 0.0033, "step": 9675 }, { "epoch": 6.703152060962937, "grad_norm": 0.1849680095911026, "learning_rate": 3.2975034674063803e-06, "loss": 0.0031, "step": 9676 }, { "epoch": 6.703844821614132, "grad_norm": 0.160861536860466, "learning_rate": 3.296809986130375e-06, "loss": 0.0024, "step": 9677 }, { "epoch": 6.704537582265328, "grad_norm": 0.2611650824546814, "learning_rate": 3.2961165048543693e-06, "loss": 0.0037, "step": 9678 }, { "epoch": 6.705230342916522, "grad_norm": 0.17863458395004272, "learning_rate": 3.2954230235783634e-06, "loss": 0.0033, "step": 9679 }, { "epoch": 6.705923103567717, "grad_norm": 0.37167879939079285, "learning_rate": 3.294729542302358e-06, "loss": 0.0059, "step": 9680 }, { "epoch": 6.706615864218913, "grad_norm": 0.22416667640209198, "learning_rate": 3.294036061026353e-06, "loss": 0.0048, "step": 9681 }, { "epoch": 6.707308624870107, "grad_norm": 0.3475745618343353, "learning_rate": 3.293342579750347e-06, "loss": 0.0046, "step": 9682 }, { "epoch": 6.708001385521302, "grad_norm": 0.20298218727111816, "learning_rate": 3.2926490984743415e-06, "loss": 0.0032, "step": 9683 }, { "epoch": 6.708694146172498, "grad_norm": 0.33679860830307007, "learning_rate": 3.291955617198336e-06, "loss": 0.0041, "step": 9684 }, { "epoch": 6.709386906823692, "grad_norm": 0.23559480905532837, "learning_rate": 3.29126213592233e-06, "loss": 0.0034, "step": 9685 }, { "epoch": 6.710079667474887, "grad_norm": 0.19997474551200867, "learning_rate": 3.290568654646325e-06, "loss": 0.0037, "step": 9686 }, { "epoch": 6.7107724281260825, "grad_norm": 0.1411164402961731, "learning_rate": 3.2898751733703195e-06, "loss": 0.0026, "step": 9687 }, { "epoch": 6.711465188777278, "grad_norm": 0.3438756763935089, "learning_rate": 3.2891816920943136e-06, "loss": 0.006, "step": 9688 }, { "epoch": 6.712157949428472, "grad_norm": 0.2956550419330597, "learning_rate": 3.288488210818308e-06, "loss": 0.0045, "step": 9689 }, { "epoch": 6.7128507100796675, "grad_norm": 0.327735036611557, "learning_rate": 3.287794729542302e-06, "loss": 0.0038, "step": 9690 }, { "epoch": 6.713543470730863, "grad_norm": 0.2549278736114502, "learning_rate": 3.287101248266297e-06, "loss": 0.0041, "step": 9691 }, { "epoch": 6.714236231382057, "grad_norm": 0.2906319200992584, "learning_rate": 3.2864077669902916e-06, "loss": 0.0037, "step": 9692 }, { "epoch": 6.714928992033252, "grad_norm": 0.19674977660179138, "learning_rate": 3.285714285714286e-06, "loss": 0.0027, "step": 9693 }, { "epoch": 6.715621752684448, "grad_norm": 0.6923297047615051, "learning_rate": 3.2850208044382802e-06, "loss": 0.0054, "step": 9694 }, { "epoch": 6.716314513335643, "grad_norm": 0.2080959528684616, "learning_rate": 3.2843273231622747e-06, "loss": 0.0029, "step": 9695 }, { "epoch": 6.717007273986837, "grad_norm": 0.3292155861854553, "learning_rate": 3.2836338418862697e-06, "loss": 0.0049, "step": 9696 }, { "epoch": 6.717700034638033, "grad_norm": 0.18534091114997864, "learning_rate": 3.2829403606102638e-06, "loss": 0.0045, "step": 9697 }, { "epoch": 6.718392795289228, "grad_norm": 0.11694613099098206, "learning_rate": 3.2822468793342583e-06, "loss": 0.002, "step": 9698 }, { "epoch": 6.719085555940422, "grad_norm": 0.24391762912273407, "learning_rate": 3.2815533980582528e-06, "loss": 0.0028, "step": 9699 }, { "epoch": 6.719778316591618, "grad_norm": 0.11731547862291336, "learning_rate": 3.280859916782247e-06, "loss": 0.0025, "step": 9700 }, { "epoch": 6.720471077242813, "grad_norm": 0.3219974637031555, "learning_rate": 3.280166435506242e-06, "loss": 0.0038, "step": 9701 }, { "epoch": 6.721163837894007, "grad_norm": 0.42592859268188477, "learning_rate": 3.2794729542302363e-06, "loss": 0.0046, "step": 9702 }, { "epoch": 6.721856598545203, "grad_norm": 0.3236500322818756, "learning_rate": 3.2787794729542304e-06, "loss": 0.0056, "step": 9703 }, { "epoch": 6.722549359196398, "grad_norm": 0.20326076447963715, "learning_rate": 3.278085991678225e-06, "loss": 0.0034, "step": 9704 }, { "epoch": 6.723242119847592, "grad_norm": 0.4166771471500397, "learning_rate": 3.277392510402219e-06, "loss": 0.0045, "step": 9705 }, { "epoch": 6.7239348804987875, "grad_norm": 0.32326415181159973, "learning_rate": 3.276699029126214e-06, "loss": 0.005, "step": 9706 }, { "epoch": 6.724627641149983, "grad_norm": 0.1668633371591568, "learning_rate": 3.2760055478502084e-06, "loss": 0.0027, "step": 9707 }, { "epoch": 6.725320401801178, "grad_norm": 0.21399752795696259, "learning_rate": 3.275312066574203e-06, "loss": 0.0046, "step": 9708 }, { "epoch": 6.7260131624523725, "grad_norm": 0.23568181693553925, "learning_rate": 3.274618585298197e-06, "loss": 0.0032, "step": 9709 }, { "epoch": 6.726705923103568, "grad_norm": 0.18345728516578674, "learning_rate": 3.2739251040221915e-06, "loss": 0.0041, "step": 9710 }, { "epoch": 6.727398683754763, "grad_norm": 0.28310561180114746, "learning_rate": 3.2732316227461865e-06, "loss": 0.003, "step": 9711 }, { "epoch": 6.728091444405957, "grad_norm": 0.5181269645690918, "learning_rate": 3.2725381414701806e-06, "loss": 0.0059, "step": 9712 }, { "epoch": 6.728784205057153, "grad_norm": 0.19630587100982666, "learning_rate": 3.271844660194175e-06, "loss": 0.0035, "step": 9713 }, { "epoch": 6.729476965708348, "grad_norm": 0.21144691109657288, "learning_rate": 3.271151178918169e-06, "loss": 0.0035, "step": 9714 }, { "epoch": 6.730169726359543, "grad_norm": 0.46461763978004456, "learning_rate": 3.2704576976421637e-06, "loss": 0.0049, "step": 9715 }, { "epoch": 6.730862487010738, "grad_norm": 0.2225027084350586, "learning_rate": 3.2697642163661586e-06, "loss": 0.0044, "step": 9716 }, { "epoch": 6.731555247661933, "grad_norm": 0.15940137207508087, "learning_rate": 3.269070735090153e-06, "loss": 0.003, "step": 9717 }, { "epoch": 6.732248008313128, "grad_norm": 0.2563859522342682, "learning_rate": 3.268377253814147e-06, "loss": 0.0034, "step": 9718 }, { "epoch": 6.732940768964323, "grad_norm": 0.23835010826587677, "learning_rate": 3.2676837725381417e-06, "loss": 0.0043, "step": 9719 }, { "epoch": 6.733633529615518, "grad_norm": 0.4149261713027954, "learning_rate": 3.266990291262136e-06, "loss": 0.0049, "step": 9720 }, { "epoch": 6.734326290266713, "grad_norm": 0.16650626063346863, "learning_rate": 3.2662968099861307e-06, "loss": 0.003, "step": 9721 }, { "epoch": 6.7350190509179075, "grad_norm": 0.37145712971687317, "learning_rate": 3.2656033287101252e-06, "loss": 0.0056, "step": 9722 }, { "epoch": 6.735711811569103, "grad_norm": 0.2124325931072235, "learning_rate": 3.2649098474341197e-06, "loss": 0.0039, "step": 9723 }, { "epoch": 6.736404572220298, "grad_norm": 0.4586735665798187, "learning_rate": 3.264216366158114e-06, "loss": 0.0059, "step": 9724 }, { "epoch": 6.7370973328714925, "grad_norm": 0.30123084783554077, "learning_rate": 3.2635228848821083e-06, "loss": 0.0066, "step": 9725 }, { "epoch": 6.737790093522688, "grad_norm": 0.24534186720848083, "learning_rate": 3.2628294036061033e-06, "loss": 0.0043, "step": 9726 }, { "epoch": 6.738482854173883, "grad_norm": 0.27212992310523987, "learning_rate": 3.2621359223300974e-06, "loss": 0.0043, "step": 9727 }, { "epoch": 6.739175614825078, "grad_norm": 0.3368600904941559, "learning_rate": 3.261442441054092e-06, "loss": 0.0045, "step": 9728 }, { "epoch": 6.739868375476273, "grad_norm": 0.46149179339408875, "learning_rate": 3.260748959778086e-06, "loss": 0.005, "step": 9729 }, { "epoch": 6.740561136127468, "grad_norm": 0.4382191002368927, "learning_rate": 3.2600554785020805e-06, "loss": 0.0035, "step": 9730 }, { "epoch": 6.741253896778663, "grad_norm": 0.1537993997335434, "learning_rate": 3.2593619972260754e-06, "loss": 0.0039, "step": 9731 }, { "epoch": 6.741946657429858, "grad_norm": 0.25139740109443665, "learning_rate": 3.25866851595007e-06, "loss": 0.0058, "step": 9732 }, { "epoch": 6.742639418081053, "grad_norm": 0.16098076105117798, "learning_rate": 3.257975034674064e-06, "loss": 0.0029, "step": 9733 }, { "epoch": 6.743332178732248, "grad_norm": 0.25203338265419006, "learning_rate": 3.2572815533980585e-06, "loss": 0.0042, "step": 9734 }, { "epoch": 6.7440249393834435, "grad_norm": 0.6023163199424744, "learning_rate": 3.2565880721220526e-06, "loss": 0.0064, "step": 9735 }, { "epoch": 6.744717700034638, "grad_norm": 0.18290142714977264, "learning_rate": 3.2558945908460475e-06, "loss": 0.0035, "step": 9736 }, { "epoch": 6.745410460685833, "grad_norm": 0.2506641447544098, "learning_rate": 3.255201109570042e-06, "loss": 0.0054, "step": 9737 }, { "epoch": 6.746103221337028, "grad_norm": 0.30213823914527893, "learning_rate": 3.254507628294036e-06, "loss": 0.0042, "step": 9738 }, { "epoch": 6.746795981988223, "grad_norm": 0.2335348129272461, "learning_rate": 3.2538141470180306e-06, "loss": 0.0034, "step": 9739 }, { "epoch": 6.747488742639418, "grad_norm": 0.26008355617523193, "learning_rate": 3.253120665742025e-06, "loss": 0.0031, "step": 9740 }, { "epoch": 6.748181503290613, "grad_norm": 0.46285054087638855, "learning_rate": 3.25242718446602e-06, "loss": 0.0048, "step": 9741 }, { "epoch": 6.748874263941808, "grad_norm": 0.16118820011615753, "learning_rate": 3.251733703190014e-06, "loss": 0.0029, "step": 9742 }, { "epoch": 6.749567024593003, "grad_norm": 0.22637991607189178, "learning_rate": 3.2510402219140087e-06, "loss": 0.0035, "step": 9743 }, { "epoch": 6.750259785244198, "grad_norm": 0.25915202498435974, "learning_rate": 3.2503467406380028e-06, "loss": 0.0038, "step": 9744 }, { "epoch": 6.750952545895393, "grad_norm": 0.31419217586517334, "learning_rate": 3.2496532593619973e-06, "loss": 0.0046, "step": 9745 }, { "epoch": 6.751645306546588, "grad_norm": 0.21347445249557495, "learning_rate": 3.248959778085992e-06, "loss": 0.0033, "step": 9746 }, { "epoch": 6.752338067197783, "grad_norm": 0.4536197781562805, "learning_rate": 3.2482662968099867e-06, "loss": 0.003, "step": 9747 }, { "epoch": 6.753030827848978, "grad_norm": 0.3877834379673004, "learning_rate": 3.247572815533981e-06, "loss": 0.0053, "step": 9748 }, { "epoch": 6.753723588500173, "grad_norm": 0.1879110336303711, "learning_rate": 3.2468793342579753e-06, "loss": 0.0024, "step": 9749 }, { "epoch": 6.754416349151368, "grad_norm": 0.2452160269021988, "learning_rate": 3.2461858529819694e-06, "loss": 0.0039, "step": 9750 }, { "epoch": 6.7551091098025635, "grad_norm": 0.48815909028053284, "learning_rate": 3.2454923717059643e-06, "loss": 0.0034, "step": 9751 }, { "epoch": 6.755801870453758, "grad_norm": 0.2948278784751892, "learning_rate": 3.244798890429959e-06, "loss": 0.0044, "step": 9752 }, { "epoch": 6.756494631104953, "grad_norm": 0.22150950133800507, "learning_rate": 3.244105409153953e-06, "loss": 0.0034, "step": 9753 }, { "epoch": 6.7571873917561485, "grad_norm": 0.2298438400030136, "learning_rate": 3.2434119278779474e-06, "loss": 0.0043, "step": 9754 }, { "epoch": 6.757880152407344, "grad_norm": 0.22544124722480774, "learning_rate": 3.242718446601942e-06, "loss": 0.0037, "step": 9755 }, { "epoch": 6.758572913058538, "grad_norm": 0.22202441096305847, "learning_rate": 3.242024965325937e-06, "loss": 0.0032, "step": 9756 }, { "epoch": 6.759265673709733, "grad_norm": 0.20543573796749115, "learning_rate": 3.241331484049931e-06, "loss": 0.0036, "step": 9757 }, { "epoch": 6.759958434360929, "grad_norm": 0.285784512758255, "learning_rate": 3.2406380027739255e-06, "loss": 0.003, "step": 9758 }, { "epoch": 6.760651195012123, "grad_norm": 0.25166475772857666, "learning_rate": 3.2399445214979196e-06, "loss": 0.0031, "step": 9759 }, { "epoch": 6.761343955663318, "grad_norm": 0.13722099363803864, "learning_rate": 3.239251040221914e-06, "loss": 0.0022, "step": 9760 }, { "epoch": 6.762036716314514, "grad_norm": 0.25914523005485535, "learning_rate": 3.238557558945909e-06, "loss": 0.0033, "step": 9761 }, { "epoch": 6.762729476965708, "grad_norm": 0.1303299069404602, "learning_rate": 3.237864077669903e-06, "loss": 0.0026, "step": 9762 }, { "epoch": 6.763422237616903, "grad_norm": 0.38397571444511414, "learning_rate": 3.2371705963938976e-06, "loss": 0.0037, "step": 9763 }, { "epoch": 6.764114998268099, "grad_norm": 0.2579515278339386, "learning_rate": 3.236477115117892e-06, "loss": 0.0039, "step": 9764 }, { "epoch": 6.764807758919293, "grad_norm": 0.1735793650150299, "learning_rate": 3.235783633841886e-06, "loss": 0.0025, "step": 9765 }, { "epoch": 6.765500519570488, "grad_norm": 0.18444368243217468, "learning_rate": 3.235090152565881e-06, "loss": 0.0038, "step": 9766 }, { "epoch": 6.7661932802216835, "grad_norm": 0.29714348912239075, "learning_rate": 3.2343966712898756e-06, "loss": 0.0042, "step": 9767 }, { "epoch": 6.766886040872878, "grad_norm": 0.29417502880096436, "learning_rate": 3.2337031900138697e-06, "loss": 0.0037, "step": 9768 }, { "epoch": 6.767578801524073, "grad_norm": 0.16488131880760193, "learning_rate": 3.2330097087378642e-06, "loss": 0.0028, "step": 9769 }, { "epoch": 6.7682715621752685, "grad_norm": 0.224967360496521, "learning_rate": 3.2323162274618587e-06, "loss": 0.0042, "step": 9770 }, { "epoch": 6.768964322826464, "grad_norm": 0.25119632482528687, "learning_rate": 3.2316227461858537e-06, "loss": 0.0032, "step": 9771 }, { "epoch": 6.769657083477658, "grad_norm": 0.2366628646850586, "learning_rate": 3.2309292649098478e-06, "loss": 0.0035, "step": 9772 }, { "epoch": 6.770349844128853, "grad_norm": 0.2102373242378235, "learning_rate": 3.2302357836338423e-06, "loss": 0.0029, "step": 9773 }, { "epoch": 6.771042604780049, "grad_norm": 0.2694167494773865, "learning_rate": 3.2295423023578364e-06, "loss": 0.0041, "step": 9774 }, { "epoch": 6.771735365431244, "grad_norm": 0.26727476716041565, "learning_rate": 3.228848821081831e-06, "loss": 0.0034, "step": 9775 }, { "epoch": 6.772428126082438, "grad_norm": 0.23914754390716553, "learning_rate": 3.228155339805826e-06, "loss": 0.0054, "step": 9776 }, { "epoch": 6.773120886733634, "grad_norm": 0.4744378626346588, "learning_rate": 3.22746185852982e-06, "loss": 0.0062, "step": 9777 }, { "epoch": 6.773813647384829, "grad_norm": 0.22059325873851776, "learning_rate": 3.2267683772538144e-06, "loss": 0.004, "step": 9778 }, { "epoch": 6.774506408036023, "grad_norm": 0.2267928123474121, "learning_rate": 3.226074895977809e-06, "loss": 0.0044, "step": 9779 }, { "epoch": 6.775199168687219, "grad_norm": 0.3121776878833771, "learning_rate": 3.225381414701803e-06, "loss": 0.0051, "step": 9780 }, { "epoch": 6.775891929338414, "grad_norm": 0.35926979780197144, "learning_rate": 3.224687933425798e-06, "loss": 0.0049, "step": 9781 }, { "epoch": 6.776584689989608, "grad_norm": 0.7782687544822693, "learning_rate": 3.2239944521497924e-06, "loss": 0.0033, "step": 9782 }, { "epoch": 6.777277450640804, "grad_norm": 0.37896329164505005, "learning_rate": 3.2233009708737865e-06, "loss": 0.0031, "step": 9783 }, { "epoch": 6.777970211291999, "grad_norm": 0.3360580503940582, "learning_rate": 3.222607489597781e-06, "loss": 0.0037, "step": 9784 }, { "epoch": 6.778662971943193, "grad_norm": 0.5456976890563965, "learning_rate": 3.2219140083217755e-06, "loss": 0.0077, "step": 9785 }, { "epoch": 6.7793557325943885, "grad_norm": 0.16573163866996765, "learning_rate": 3.22122052704577e-06, "loss": 0.0035, "step": 9786 }, { "epoch": 6.780048493245584, "grad_norm": 0.34573253989219666, "learning_rate": 3.2205270457697646e-06, "loss": 0.0062, "step": 9787 }, { "epoch": 6.780741253896778, "grad_norm": 0.28036412596702576, "learning_rate": 3.219833564493759e-06, "loss": 0.0038, "step": 9788 }, { "epoch": 6.7814340145479735, "grad_norm": 0.25258782505989075, "learning_rate": 3.219140083217753e-06, "loss": 0.0034, "step": 9789 }, { "epoch": 6.782126775199169, "grad_norm": 0.23531854152679443, "learning_rate": 3.2184466019417477e-06, "loss": 0.0031, "step": 9790 }, { "epoch": 6.782819535850364, "grad_norm": 0.3496544063091278, "learning_rate": 3.2177531206657426e-06, "loss": 0.0042, "step": 9791 }, { "epoch": 6.783512296501558, "grad_norm": 0.2098337858915329, "learning_rate": 3.2170596393897367e-06, "loss": 0.0038, "step": 9792 }, { "epoch": 6.784205057152754, "grad_norm": 0.3733336627483368, "learning_rate": 3.216366158113731e-06, "loss": 0.0046, "step": 9793 }, { "epoch": 6.784897817803949, "grad_norm": 0.5599023103713989, "learning_rate": 3.2156726768377257e-06, "loss": 0.0044, "step": 9794 }, { "epoch": 6.785590578455144, "grad_norm": 0.1788777858018875, "learning_rate": 3.21497919556172e-06, "loss": 0.003, "step": 9795 }, { "epoch": 6.786283339106339, "grad_norm": 0.15402033925056458, "learning_rate": 3.2142857142857147e-06, "loss": 0.0028, "step": 9796 }, { "epoch": 6.786976099757534, "grad_norm": 0.24567104876041412, "learning_rate": 3.2135922330097092e-06, "loss": 0.0051, "step": 9797 }, { "epoch": 6.787668860408729, "grad_norm": 0.33602938055992126, "learning_rate": 3.2128987517337033e-06, "loss": 0.0044, "step": 9798 }, { "epoch": 6.788361621059924, "grad_norm": 0.1702132374048233, "learning_rate": 3.212205270457698e-06, "loss": 0.0027, "step": 9799 }, { "epoch": 6.789054381711119, "grad_norm": 0.26751288771629333, "learning_rate": 3.2115117891816923e-06, "loss": 0.0039, "step": 9800 }, { "epoch": 6.789747142362314, "grad_norm": 0.312261164188385, "learning_rate": 3.210818307905687e-06, "loss": 0.0057, "step": 9801 }, { "epoch": 6.7904399030135085, "grad_norm": 0.27268752455711365, "learning_rate": 3.2101248266296814e-06, "loss": 0.0041, "step": 9802 }, { "epoch": 6.791132663664704, "grad_norm": 0.20988130569458008, "learning_rate": 3.209431345353676e-06, "loss": 0.0042, "step": 9803 }, { "epoch": 6.791825424315899, "grad_norm": 0.22025372087955475, "learning_rate": 3.20873786407767e-06, "loss": 0.0034, "step": 9804 }, { "epoch": 6.7925181849670935, "grad_norm": 0.3386022448539734, "learning_rate": 3.2080443828016645e-06, "loss": 0.0073, "step": 9805 }, { "epoch": 6.793210945618289, "grad_norm": 0.2105453461408615, "learning_rate": 3.2073509015256594e-06, "loss": 0.0035, "step": 9806 }, { "epoch": 6.793903706269484, "grad_norm": 0.45902219414711, "learning_rate": 3.2066574202496535e-06, "loss": 0.0062, "step": 9807 }, { "epoch": 6.794596466920678, "grad_norm": 0.13881872594356537, "learning_rate": 3.205963938973648e-06, "loss": 0.0031, "step": 9808 }, { "epoch": 6.795289227571874, "grad_norm": 0.21973024308681488, "learning_rate": 3.2052704576976425e-06, "loss": 0.0035, "step": 9809 }, { "epoch": 6.795981988223069, "grad_norm": 0.2734771966934204, "learning_rate": 3.2045769764216366e-06, "loss": 0.004, "step": 9810 }, { "epoch": 6.796674748874264, "grad_norm": 0.2917121648788452, "learning_rate": 3.2038834951456315e-06, "loss": 0.0046, "step": 9811 }, { "epoch": 6.797367509525459, "grad_norm": 0.32434505224227905, "learning_rate": 3.203190013869626e-06, "loss": 0.0036, "step": 9812 }, { "epoch": 6.798060270176654, "grad_norm": 0.2449207454919815, "learning_rate": 3.20249653259362e-06, "loss": 0.0044, "step": 9813 }, { "epoch": 6.798753030827849, "grad_norm": 0.6148754954338074, "learning_rate": 3.2018030513176146e-06, "loss": 0.004, "step": 9814 }, { "epoch": 6.7994457914790445, "grad_norm": 0.2852209508419037, "learning_rate": 3.201109570041609e-06, "loss": 0.0059, "step": 9815 }, { "epoch": 6.800138552130239, "grad_norm": 0.3754134178161621, "learning_rate": 3.2004160887656037e-06, "loss": 0.0042, "step": 9816 }, { "epoch": 6.800831312781434, "grad_norm": 0.30358991026878357, "learning_rate": 3.199722607489598e-06, "loss": 0.0037, "step": 9817 }, { "epoch": 6.801524073432629, "grad_norm": 0.25480136275291443, "learning_rate": 3.1990291262135927e-06, "loss": 0.0042, "step": 9818 }, { "epoch": 6.802216834083824, "grad_norm": 0.2729668617248535, "learning_rate": 3.1983356449375868e-06, "loss": 0.0044, "step": 9819 }, { "epoch": 6.802909594735019, "grad_norm": 0.4184122383594513, "learning_rate": 3.1976421636615813e-06, "loss": 0.006, "step": 9820 }, { "epoch": 6.803602355386214, "grad_norm": 0.3179779648780823, "learning_rate": 3.196948682385576e-06, "loss": 0.0033, "step": 9821 }, { "epoch": 6.804295116037409, "grad_norm": 0.4185744524002075, "learning_rate": 3.1962552011095703e-06, "loss": 0.0034, "step": 9822 }, { "epoch": 6.804987876688604, "grad_norm": 0.16288727521896362, "learning_rate": 3.195561719833565e-06, "loss": 0.0032, "step": 9823 }, { "epoch": 6.805680637339799, "grad_norm": 0.18902811408042908, "learning_rate": 3.1948682385575593e-06, "loss": 0.0032, "step": 9824 }, { "epoch": 6.806373397990994, "grad_norm": 0.16687580943107605, "learning_rate": 3.1941747572815534e-06, "loss": 0.0041, "step": 9825 }, { "epoch": 6.807066158642189, "grad_norm": 0.23390142619609833, "learning_rate": 3.1934812760055483e-06, "loss": 0.0033, "step": 9826 }, { "epoch": 6.807758919293384, "grad_norm": 0.26326286792755127, "learning_rate": 3.192787794729543e-06, "loss": 0.004, "step": 9827 }, { "epoch": 6.808451679944579, "grad_norm": 0.4257308840751648, "learning_rate": 3.192094313453537e-06, "loss": 0.0043, "step": 9828 }, { "epoch": 6.809144440595774, "grad_norm": 0.1480860710144043, "learning_rate": 3.1914008321775314e-06, "loss": 0.0026, "step": 9829 }, { "epoch": 6.809837201246969, "grad_norm": 0.23659858107566833, "learning_rate": 3.1907073509015255e-06, "loss": 0.0029, "step": 9830 }, { "epoch": 6.8105299618981645, "grad_norm": 0.4106503129005432, "learning_rate": 3.1900138696255205e-06, "loss": 0.0037, "step": 9831 }, { "epoch": 6.811222722549359, "grad_norm": 0.3442458212375641, "learning_rate": 3.189320388349515e-06, "loss": 0.0051, "step": 9832 }, { "epoch": 6.811915483200554, "grad_norm": 0.21609291434288025, "learning_rate": 3.1886269070735095e-06, "loss": 0.0032, "step": 9833 }, { "epoch": 6.8126082438517495, "grad_norm": 0.17556637525558472, "learning_rate": 3.1879334257975036e-06, "loss": 0.003, "step": 9834 }, { "epoch": 6.813301004502944, "grad_norm": 0.3292466402053833, "learning_rate": 3.187239944521498e-06, "loss": 0.0039, "step": 9835 }, { "epoch": 6.813993765154139, "grad_norm": 0.25917738676071167, "learning_rate": 3.186546463245493e-06, "loss": 0.0048, "step": 9836 }, { "epoch": 6.814686525805334, "grad_norm": 0.17207622528076172, "learning_rate": 3.185852981969487e-06, "loss": 0.0042, "step": 9837 }, { "epoch": 6.81537928645653, "grad_norm": 0.24776802957057953, "learning_rate": 3.1851595006934816e-06, "loss": 0.0043, "step": 9838 }, { "epoch": 6.816072047107724, "grad_norm": 0.32751888036727905, "learning_rate": 3.184466019417476e-06, "loss": 0.0045, "step": 9839 }, { "epoch": 6.816764807758919, "grad_norm": 0.11641302704811096, "learning_rate": 3.18377253814147e-06, "loss": 0.0023, "step": 9840 }, { "epoch": 6.817457568410115, "grad_norm": 0.450704425573349, "learning_rate": 3.183079056865465e-06, "loss": 0.0057, "step": 9841 }, { "epoch": 6.818150329061309, "grad_norm": 0.2343761920928955, "learning_rate": 3.1823855755894596e-06, "loss": 0.0033, "step": 9842 }, { "epoch": 6.818843089712504, "grad_norm": 0.29512107372283936, "learning_rate": 3.1816920943134537e-06, "loss": 0.0032, "step": 9843 }, { "epoch": 6.8195358503637, "grad_norm": 0.16802075505256653, "learning_rate": 3.1809986130374482e-06, "loss": 0.0029, "step": 9844 }, { "epoch": 6.820228611014894, "grad_norm": 0.16243338584899902, "learning_rate": 3.1803051317614423e-06, "loss": 0.0036, "step": 9845 }, { "epoch": 6.820921371666089, "grad_norm": 0.398639053106308, "learning_rate": 3.1796116504854373e-06, "loss": 0.0051, "step": 9846 }, { "epoch": 6.8216141323172845, "grad_norm": 0.317801833152771, "learning_rate": 3.1789181692094318e-06, "loss": 0.0059, "step": 9847 }, { "epoch": 6.822306892968479, "grad_norm": 0.6256863474845886, "learning_rate": 3.1782246879334263e-06, "loss": 0.0056, "step": 9848 }, { "epoch": 6.822999653619674, "grad_norm": 0.3846816420555115, "learning_rate": 3.1775312066574204e-06, "loss": 0.0054, "step": 9849 }, { "epoch": 6.8236924142708695, "grad_norm": 0.1945173144340515, "learning_rate": 3.176837725381415e-06, "loss": 0.004, "step": 9850 }, { "epoch": 6.824385174922065, "grad_norm": 0.21418678760528564, "learning_rate": 3.17614424410541e-06, "loss": 0.003, "step": 9851 }, { "epoch": 6.825077935573259, "grad_norm": 0.29843252897262573, "learning_rate": 3.175450762829404e-06, "loss": 0.0059, "step": 9852 }, { "epoch": 6.825770696224454, "grad_norm": 0.19400453567504883, "learning_rate": 3.1747572815533984e-06, "loss": 0.0034, "step": 9853 }, { "epoch": 6.82646345687565, "grad_norm": 0.2794119715690613, "learning_rate": 3.1740638002773925e-06, "loss": 0.0045, "step": 9854 }, { "epoch": 6.827156217526844, "grad_norm": 0.23559890687465668, "learning_rate": 3.173370319001387e-06, "loss": 0.0028, "step": 9855 }, { "epoch": 6.827848978178039, "grad_norm": 0.1915004700422287, "learning_rate": 3.172676837725382e-06, "loss": 0.0041, "step": 9856 }, { "epoch": 6.828541738829235, "grad_norm": 0.212208554148674, "learning_rate": 3.1719833564493764e-06, "loss": 0.0033, "step": 9857 }, { "epoch": 6.82923449948043, "grad_norm": 0.23424312472343445, "learning_rate": 3.1712898751733705e-06, "loss": 0.003, "step": 9858 }, { "epoch": 6.829927260131624, "grad_norm": 0.34941333532333374, "learning_rate": 3.170596393897365e-06, "loss": 0.0043, "step": 9859 }, { "epoch": 6.83062002078282, "grad_norm": 0.2985086143016815, "learning_rate": 3.169902912621359e-06, "loss": 0.0053, "step": 9860 }, { "epoch": 6.831312781434015, "grad_norm": 0.4434652328491211, "learning_rate": 3.169209431345354e-06, "loss": 0.0064, "step": 9861 }, { "epoch": 6.832005542085209, "grad_norm": 0.20575271546840668, "learning_rate": 3.1685159500693486e-06, "loss": 0.0034, "step": 9862 }, { "epoch": 6.8326983027364045, "grad_norm": 0.1959100216627121, "learning_rate": 3.167822468793343e-06, "loss": 0.0035, "step": 9863 }, { "epoch": 6.8333910633876, "grad_norm": 0.38107800483703613, "learning_rate": 3.167128987517337e-06, "loss": 0.0044, "step": 9864 }, { "epoch": 6.834083824038794, "grad_norm": 0.15644213557243347, "learning_rate": 3.1664355062413317e-06, "loss": 0.0025, "step": 9865 }, { "epoch": 6.8347765846899895, "grad_norm": 0.19525551795959473, "learning_rate": 3.1657420249653266e-06, "loss": 0.0032, "step": 9866 }, { "epoch": 6.835469345341185, "grad_norm": 0.29288479685783386, "learning_rate": 3.1650485436893207e-06, "loss": 0.0038, "step": 9867 }, { "epoch": 6.836162105992379, "grad_norm": 0.5829963684082031, "learning_rate": 3.164355062413315e-06, "loss": 0.0058, "step": 9868 }, { "epoch": 6.8368548666435744, "grad_norm": 0.21414701640605927, "learning_rate": 3.1636615811373093e-06, "loss": 0.0045, "step": 9869 }, { "epoch": 6.83754762729477, "grad_norm": 0.21531203389167786, "learning_rate": 3.162968099861304e-06, "loss": 0.0032, "step": 9870 }, { "epoch": 6.838240387945965, "grad_norm": 0.1464047133922577, "learning_rate": 3.1622746185852987e-06, "loss": 0.0026, "step": 9871 }, { "epoch": 6.838933148597159, "grad_norm": 0.20787616074085236, "learning_rate": 3.1615811373092932e-06, "loss": 0.003, "step": 9872 }, { "epoch": 6.839625909248355, "grad_norm": 0.7196558117866516, "learning_rate": 3.1608876560332873e-06, "loss": 0.0073, "step": 9873 }, { "epoch": 6.84031866989955, "grad_norm": 0.2806764841079712, "learning_rate": 3.160194174757282e-06, "loss": 0.0055, "step": 9874 }, { "epoch": 6.841011430550744, "grad_norm": 0.23197102546691895, "learning_rate": 3.159500693481276e-06, "loss": 0.0034, "step": 9875 }, { "epoch": 6.84170419120194, "grad_norm": 0.208356112241745, "learning_rate": 3.158807212205271e-06, "loss": 0.0031, "step": 9876 }, { "epoch": 6.842396951853135, "grad_norm": 0.35312914848327637, "learning_rate": 3.1581137309292654e-06, "loss": 0.0044, "step": 9877 }, { "epoch": 6.84308971250433, "grad_norm": 0.20556029677391052, "learning_rate": 3.1574202496532595e-06, "loss": 0.0036, "step": 9878 }, { "epoch": 6.843782473155525, "grad_norm": 0.1898297816514969, "learning_rate": 3.156726768377254e-06, "loss": 0.0036, "step": 9879 }, { "epoch": 6.84447523380672, "grad_norm": 0.1932491511106491, "learning_rate": 3.1560332871012485e-06, "loss": 0.0029, "step": 9880 }, { "epoch": 6.845167994457915, "grad_norm": 0.2936733365058899, "learning_rate": 3.1553398058252434e-06, "loss": 0.0039, "step": 9881 }, { "epoch": 6.8458607551091095, "grad_norm": 0.274087131023407, "learning_rate": 3.1546463245492375e-06, "loss": 0.0039, "step": 9882 }, { "epoch": 6.846553515760305, "grad_norm": 0.3380624055862427, "learning_rate": 3.153952843273232e-06, "loss": 0.007, "step": 9883 }, { "epoch": 6.8472462764115, "grad_norm": 0.14727269113063812, "learning_rate": 3.153259361997226e-06, "loss": 0.0026, "step": 9884 }, { "epoch": 6.8479390370626945, "grad_norm": 0.17885534465312958, "learning_rate": 3.1525658807212206e-06, "loss": 0.0035, "step": 9885 }, { "epoch": 6.84863179771389, "grad_norm": 0.32952526211738586, "learning_rate": 3.1518723994452155e-06, "loss": 0.0036, "step": 9886 }, { "epoch": 6.849324558365085, "grad_norm": 0.2858526110649109, "learning_rate": 3.15117891816921e-06, "loss": 0.004, "step": 9887 }, { "epoch": 6.850017319016279, "grad_norm": 0.21263548731803894, "learning_rate": 3.150485436893204e-06, "loss": 0.0031, "step": 9888 }, { "epoch": 6.850710079667475, "grad_norm": 0.17841656506061554, "learning_rate": 3.1497919556171986e-06, "loss": 0.0029, "step": 9889 }, { "epoch": 6.85140284031867, "grad_norm": 0.1647372841835022, "learning_rate": 3.1490984743411927e-06, "loss": 0.0025, "step": 9890 }, { "epoch": 6.852095600969865, "grad_norm": 0.1923486292362213, "learning_rate": 3.1484049930651877e-06, "loss": 0.0032, "step": 9891 }, { "epoch": 6.85278836162106, "grad_norm": 0.19028408825397491, "learning_rate": 3.147711511789182e-06, "loss": 0.0032, "step": 9892 }, { "epoch": 6.853481122272255, "grad_norm": 0.11258408427238464, "learning_rate": 3.1470180305131763e-06, "loss": 0.0022, "step": 9893 }, { "epoch": 6.85417388292345, "grad_norm": 0.23631267249584198, "learning_rate": 3.1463245492371708e-06, "loss": 0.003, "step": 9894 }, { "epoch": 6.854866643574645, "grad_norm": 0.14479823410511017, "learning_rate": 3.1456310679611653e-06, "loss": 0.0024, "step": 9895 }, { "epoch": 6.85555940422584, "grad_norm": 0.16995778679847717, "learning_rate": 3.1449375866851602e-06, "loss": 0.0027, "step": 9896 }, { "epoch": 6.856252164877035, "grad_norm": 0.27697867155075073, "learning_rate": 3.1442441054091543e-06, "loss": 0.0032, "step": 9897 }, { "epoch": 6.85694492552823, "grad_norm": 0.19249166548252106, "learning_rate": 3.143550624133149e-06, "loss": 0.0028, "step": 9898 }, { "epoch": 6.857637686179425, "grad_norm": 0.22699804604053497, "learning_rate": 3.142857142857143e-06, "loss": 0.0044, "step": 9899 }, { "epoch": 6.85833044683062, "grad_norm": 0.30924737453460693, "learning_rate": 3.1421636615811374e-06, "loss": 0.0048, "step": 9900 }, { "epoch": 6.859023207481815, "grad_norm": 0.25368955731391907, "learning_rate": 3.1414701803051323e-06, "loss": 0.0046, "step": 9901 }, { "epoch": 6.85971596813301, "grad_norm": 0.2596815526485443, "learning_rate": 3.1407766990291264e-06, "loss": 0.0039, "step": 9902 }, { "epoch": 6.860408728784205, "grad_norm": 0.28993862867355347, "learning_rate": 3.140083217753121e-06, "loss": 0.0047, "step": 9903 }, { "epoch": 6.8611014894354, "grad_norm": 0.23070913553237915, "learning_rate": 3.1393897364771154e-06, "loss": 0.0042, "step": 9904 }, { "epoch": 6.861794250086595, "grad_norm": 0.27373337745666504, "learning_rate": 3.1386962552011095e-06, "loss": 0.0038, "step": 9905 }, { "epoch": 6.86248701073779, "grad_norm": 0.17786487936973572, "learning_rate": 3.1380027739251045e-06, "loss": 0.0026, "step": 9906 }, { "epoch": 6.863179771388985, "grad_norm": 0.1329381763935089, "learning_rate": 3.137309292649099e-06, "loss": 0.0023, "step": 9907 }, { "epoch": 6.86387253204018, "grad_norm": 0.25502750277519226, "learning_rate": 3.136615811373093e-06, "loss": 0.0041, "step": 9908 }, { "epoch": 6.864565292691375, "grad_norm": 0.2774457037448883, "learning_rate": 3.1359223300970876e-06, "loss": 0.0042, "step": 9909 }, { "epoch": 6.86525805334257, "grad_norm": 0.20210881531238556, "learning_rate": 3.135228848821082e-06, "loss": 0.0032, "step": 9910 }, { "epoch": 6.8659508139937655, "grad_norm": 0.2541934549808502, "learning_rate": 3.134535367545077e-06, "loss": 0.0044, "step": 9911 }, { "epoch": 6.86664357464496, "grad_norm": 0.25267860293388367, "learning_rate": 3.133841886269071e-06, "loss": 0.0046, "step": 9912 }, { "epoch": 6.867336335296155, "grad_norm": 0.1430072784423828, "learning_rate": 3.1331484049930656e-06, "loss": 0.0023, "step": 9913 }, { "epoch": 6.8680290959473504, "grad_norm": 0.3600761592388153, "learning_rate": 3.1324549237170597e-06, "loss": 0.0043, "step": 9914 }, { "epoch": 6.868721856598545, "grad_norm": 0.2174580842256546, "learning_rate": 3.131761442441054e-06, "loss": 0.0033, "step": 9915 }, { "epoch": 6.86941461724974, "grad_norm": 0.49101021885871887, "learning_rate": 3.131067961165049e-06, "loss": 0.0045, "step": 9916 }, { "epoch": 6.870107377900935, "grad_norm": 0.10587871074676514, "learning_rate": 3.1303744798890432e-06, "loss": 0.002, "step": 9917 }, { "epoch": 6.870800138552131, "grad_norm": 0.32948845624923706, "learning_rate": 3.1296809986130377e-06, "loss": 0.0036, "step": 9918 }, { "epoch": 6.871492899203325, "grad_norm": 0.30850082635879517, "learning_rate": 3.1289875173370322e-06, "loss": 0.0039, "step": 9919 }, { "epoch": 6.87218565985452, "grad_norm": 0.3731090724468231, "learning_rate": 3.1282940360610263e-06, "loss": 0.0052, "step": 9920 }, { "epoch": 6.872878420505716, "grad_norm": 0.23356278240680695, "learning_rate": 3.1276005547850213e-06, "loss": 0.0022, "step": 9921 }, { "epoch": 6.87357118115691, "grad_norm": 0.14465276896953583, "learning_rate": 3.1269070735090158e-06, "loss": 0.0027, "step": 9922 }, { "epoch": 6.874263941808105, "grad_norm": 0.35693371295928955, "learning_rate": 3.12621359223301e-06, "loss": 0.0037, "step": 9923 }, { "epoch": 6.874956702459301, "grad_norm": 0.505262017250061, "learning_rate": 3.1255201109570044e-06, "loss": 0.0059, "step": 9924 }, { "epoch": 6.875649463110495, "grad_norm": 0.3666226267814636, "learning_rate": 3.124826629680999e-06, "loss": 0.0059, "step": 9925 }, { "epoch": 6.87634222376169, "grad_norm": 0.35455337166786194, "learning_rate": 3.124133148404993e-06, "loss": 0.006, "step": 9926 }, { "epoch": 6.8770349844128855, "grad_norm": 0.1944286972284317, "learning_rate": 3.123439667128988e-06, "loss": 0.0036, "step": 9927 }, { "epoch": 6.87772774506408, "grad_norm": 0.2120799571275711, "learning_rate": 3.1227461858529824e-06, "loss": 0.004, "step": 9928 }, { "epoch": 6.878420505715275, "grad_norm": 0.27662771940231323, "learning_rate": 3.1220527045769765e-06, "loss": 0.0029, "step": 9929 }, { "epoch": 6.8791132663664705, "grad_norm": 0.20927362143993378, "learning_rate": 3.121359223300971e-06, "loss": 0.0036, "step": 9930 }, { "epoch": 6.879806027017666, "grad_norm": 0.28317469358444214, "learning_rate": 3.120665742024965e-06, "loss": 0.0032, "step": 9931 }, { "epoch": 6.88049878766886, "grad_norm": 0.2607945501804352, "learning_rate": 3.11997226074896e-06, "loss": 0.0037, "step": 9932 }, { "epoch": 6.881191548320055, "grad_norm": 0.3832789659500122, "learning_rate": 3.1192787794729545e-06, "loss": 0.0053, "step": 9933 }, { "epoch": 6.881884308971251, "grad_norm": 0.2038079798221588, "learning_rate": 3.118585298196949e-06, "loss": 0.0027, "step": 9934 }, { "epoch": 6.882577069622445, "grad_norm": 0.3464046120643616, "learning_rate": 3.117891816920943e-06, "loss": 0.0045, "step": 9935 }, { "epoch": 6.88326983027364, "grad_norm": 0.3392944037914276, "learning_rate": 3.1171983356449376e-06, "loss": 0.0049, "step": 9936 }, { "epoch": 6.883962590924836, "grad_norm": 0.3029744327068329, "learning_rate": 3.1165048543689326e-06, "loss": 0.0035, "step": 9937 }, { "epoch": 6.884655351576031, "grad_norm": 0.15682591497898102, "learning_rate": 3.1158113730929267e-06, "loss": 0.0032, "step": 9938 }, { "epoch": 6.885348112227225, "grad_norm": 0.3812074661254883, "learning_rate": 3.115117891816921e-06, "loss": 0.0033, "step": 9939 }, { "epoch": 6.886040872878421, "grad_norm": 0.3690953850746155, "learning_rate": 3.1144244105409157e-06, "loss": 0.0058, "step": 9940 }, { "epoch": 6.886733633529616, "grad_norm": 0.298159658908844, "learning_rate": 3.1137309292649098e-06, "loss": 0.0047, "step": 9941 }, { "epoch": 6.88742639418081, "grad_norm": 0.40038490295410156, "learning_rate": 3.1130374479889047e-06, "loss": 0.0056, "step": 9942 }, { "epoch": 6.8881191548320055, "grad_norm": 0.1882338970899582, "learning_rate": 3.1123439667128992e-06, "loss": 0.004, "step": 9943 }, { "epoch": 6.888811915483201, "grad_norm": 0.20926852524280548, "learning_rate": 3.1116504854368933e-06, "loss": 0.0036, "step": 9944 }, { "epoch": 6.889504676134395, "grad_norm": 0.1722264587879181, "learning_rate": 3.110957004160888e-06, "loss": 0.0027, "step": 9945 }, { "epoch": 6.8901974367855905, "grad_norm": 0.23840396106243134, "learning_rate": 3.110263522884882e-06, "loss": 0.0029, "step": 9946 }, { "epoch": 6.890890197436786, "grad_norm": 0.2524208426475525, "learning_rate": 3.109570041608877e-06, "loss": 0.0044, "step": 9947 }, { "epoch": 6.89158295808798, "grad_norm": 0.40240007638931274, "learning_rate": 3.1088765603328713e-06, "loss": 0.0047, "step": 9948 }, { "epoch": 6.892275718739175, "grad_norm": 0.17475059628486633, "learning_rate": 3.108183079056866e-06, "loss": 0.0024, "step": 9949 }, { "epoch": 6.892968479390371, "grad_norm": 0.28309381008148193, "learning_rate": 3.10748959778086e-06, "loss": 0.003, "step": 9950 }, { "epoch": 6.893661240041566, "grad_norm": 0.1873553842306137, "learning_rate": 3.1067961165048544e-06, "loss": 0.0032, "step": 9951 }, { "epoch": 6.89435400069276, "grad_norm": 0.40987929701805115, "learning_rate": 3.1061026352288494e-06, "loss": 0.0039, "step": 9952 }, { "epoch": 6.895046761343956, "grad_norm": 0.28078773617744446, "learning_rate": 3.1054091539528435e-06, "loss": 0.0034, "step": 9953 }, { "epoch": 6.895739521995151, "grad_norm": 0.34553733468055725, "learning_rate": 3.104715672676838e-06, "loss": 0.0041, "step": 9954 }, { "epoch": 6.896432282646345, "grad_norm": 0.19245411455631256, "learning_rate": 3.1040221914008325e-06, "loss": 0.0028, "step": 9955 }, { "epoch": 6.897125043297541, "grad_norm": 0.8639165759086609, "learning_rate": 3.1033287101248266e-06, "loss": 0.0045, "step": 9956 }, { "epoch": 6.897817803948736, "grad_norm": 0.22493526339530945, "learning_rate": 3.1026352288488215e-06, "loss": 0.0041, "step": 9957 }, { "epoch": 6.898510564599931, "grad_norm": 0.22028587758541107, "learning_rate": 3.101941747572816e-06, "loss": 0.0036, "step": 9958 }, { "epoch": 6.899203325251126, "grad_norm": 0.21007047593593597, "learning_rate": 3.10124826629681e-06, "loss": 0.0037, "step": 9959 }, { "epoch": 6.899896085902321, "grad_norm": 0.3950655162334442, "learning_rate": 3.1005547850208046e-06, "loss": 0.0048, "step": 9960 }, { "epoch": 6.900588846553516, "grad_norm": 0.13465934991836548, "learning_rate": 3.0998613037447987e-06, "loss": 0.0024, "step": 9961 }, { "epoch": 6.9012816072047105, "grad_norm": 0.3211439251899719, "learning_rate": 3.0991678224687936e-06, "loss": 0.0042, "step": 9962 }, { "epoch": 6.901974367855906, "grad_norm": 0.25775355100631714, "learning_rate": 3.098474341192788e-06, "loss": 0.0038, "step": 9963 }, { "epoch": 6.902667128507101, "grad_norm": 0.30540579557418823, "learning_rate": 3.0977808599167827e-06, "loss": 0.0034, "step": 9964 }, { "epoch": 6.9033598891582955, "grad_norm": 0.39724627137184143, "learning_rate": 3.0970873786407767e-06, "loss": 0.0033, "step": 9965 }, { "epoch": 6.904052649809491, "grad_norm": 0.20717176795005798, "learning_rate": 3.0963938973647712e-06, "loss": 0.0047, "step": 9966 }, { "epoch": 6.904745410460686, "grad_norm": 0.25329551100730896, "learning_rate": 3.095700416088766e-06, "loss": 0.0053, "step": 9967 }, { "epoch": 6.90543817111188, "grad_norm": 0.5993894338607788, "learning_rate": 3.0950069348127603e-06, "loss": 0.0048, "step": 9968 }, { "epoch": 6.906130931763076, "grad_norm": 0.3660557270050049, "learning_rate": 3.0943134535367548e-06, "loss": 0.0051, "step": 9969 }, { "epoch": 6.906823692414271, "grad_norm": 0.1983492374420166, "learning_rate": 3.093619972260749e-06, "loss": 0.0033, "step": 9970 }, { "epoch": 6.907516453065466, "grad_norm": 0.1262977570295334, "learning_rate": 3.0929264909847434e-06, "loss": 0.0026, "step": 9971 }, { "epoch": 6.908209213716661, "grad_norm": 0.4371584951877594, "learning_rate": 3.0922330097087383e-06, "loss": 0.0037, "step": 9972 }, { "epoch": 6.908901974367856, "grad_norm": 0.19370682537555695, "learning_rate": 3.091539528432733e-06, "loss": 0.0032, "step": 9973 }, { "epoch": 6.909594735019051, "grad_norm": 0.3977658748626709, "learning_rate": 3.090846047156727e-06, "loss": 0.0043, "step": 9974 }, { "epoch": 6.910287495670246, "grad_norm": 0.2266015261411667, "learning_rate": 3.0901525658807214e-06, "loss": 0.0039, "step": 9975 }, { "epoch": 6.910980256321441, "grad_norm": 0.18956920504570007, "learning_rate": 3.0894590846047155e-06, "loss": 0.0031, "step": 9976 }, { "epoch": 6.911673016972636, "grad_norm": 0.2873472571372986, "learning_rate": 3.0887656033287104e-06, "loss": 0.0051, "step": 9977 }, { "epoch": 6.912365777623831, "grad_norm": 0.25905776023864746, "learning_rate": 3.088072122052705e-06, "loss": 0.0059, "step": 9978 }, { "epoch": 6.913058538275026, "grad_norm": 0.5050122141838074, "learning_rate": 3.0873786407766995e-06, "loss": 0.006, "step": 9979 }, { "epoch": 6.913751298926221, "grad_norm": 0.28577563166618347, "learning_rate": 3.0866851595006935e-06, "loss": 0.0055, "step": 9980 }, { "epoch": 6.914444059577416, "grad_norm": 0.20023463666439056, "learning_rate": 3.085991678224688e-06, "loss": 0.0044, "step": 9981 }, { "epoch": 6.915136820228611, "grad_norm": 0.30305594205856323, "learning_rate": 3.085298196948683e-06, "loss": 0.0031, "step": 9982 }, { "epoch": 6.915829580879806, "grad_norm": 0.19310308992862701, "learning_rate": 3.084604715672677e-06, "loss": 0.0028, "step": 9983 }, { "epoch": 6.916522341531001, "grad_norm": 0.2212955355644226, "learning_rate": 3.0839112343966716e-06, "loss": 0.0033, "step": 9984 }, { "epoch": 6.917215102182196, "grad_norm": 0.27291813492774963, "learning_rate": 3.0832177531206657e-06, "loss": 0.0038, "step": 9985 }, { "epoch": 6.917907862833391, "grad_norm": 0.2718471586704254, "learning_rate": 3.08252427184466e-06, "loss": 0.0034, "step": 9986 }, { "epoch": 6.918600623484586, "grad_norm": 0.2039763629436493, "learning_rate": 3.081830790568655e-06, "loss": 0.0031, "step": 9987 }, { "epoch": 6.919293384135781, "grad_norm": 0.12809832394123077, "learning_rate": 3.0811373092926496e-06, "loss": 0.0023, "step": 9988 }, { "epoch": 6.919986144786976, "grad_norm": 0.16423216462135315, "learning_rate": 3.0804438280166437e-06, "loss": 0.0029, "step": 9989 }, { "epoch": 6.920678905438171, "grad_norm": 0.31327763199806213, "learning_rate": 3.0797503467406382e-06, "loss": 0.0037, "step": 9990 }, { "epoch": 6.9213716660893665, "grad_norm": 0.23169377446174622, "learning_rate": 3.0790568654646323e-06, "loss": 0.0033, "step": 9991 }, { "epoch": 6.922064426740561, "grad_norm": 0.37328752875328064, "learning_rate": 3.0783633841886272e-06, "loss": 0.0065, "step": 9992 }, { "epoch": 6.922757187391756, "grad_norm": 0.2823079228401184, "learning_rate": 3.0776699029126217e-06, "loss": 0.0043, "step": 9993 }, { "epoch": 6.923449948042951, "grad_norm": 0.4732924997806549, "learning_rate": 3.076976421636616e-06, "loss": 0.0045, "step": 9994 }, { "epoch": 6.924142708694146, "grad_norm": 0.3883107602596283, "learning_rate": 3.0762829403606103e-06, "loss": 0.0053, "step": 9995 }, { "epoch": 6.924835469345341, "grad_norm": 0.241115540266037, "learning_rate": 3.075589459084605e-06, "loss": 0.004, "step": 9996 }, { "epoch": 6.925528229996536, "grad_norm": 0.31863003969192505, "learning_rate": 3.0748959778085998e-06, "loss": 0.0039, "step": 9997 }, { "epoch": 6.926220990647732, "grad_norm": 0.16336983442306519, "learning_rate": 3.074202496532594e-06, "loss": 0.0026, "step": 9998 }, { "epoch": 6.926913751298926, "grad_norm": 0.2944413423538208, "learning_rate": 3.0735090152565884e-06, "loss": 0.0033, "step": 9999 }, { "epoch": 6.927606511950121, "grad_norm": 0.17245644330978394, "learning_rate": 3.0728155339805825e-06, "loss": 0.0027, "step": 10000 }, { "epoch": 6.928299272601317, "grad_norm": 0.14909231662750244, "learning_rate": 3.072122052704577e-06, "loss": 0.0038, "step": 10001 }, { "epoch": 6.928992033252511, "grad_norm": 0.19677869975566864, "learning_rate": 3.071428571428572e-06, "loss": 0.0034, "step": 10002 }, { "epoch": 6.929684793903706, "grad_norm": 0.15342335402965546, "learning_rate": 3.0707350901525664e-06, "loss": 0.0027, "step": 10003 }, { "epoch": 6.930377554554902, "grad_norm": 0.5093405842781067, "learning_rate": 3.0700416088765605e-06, "loss": 0.0037, "step": 10004 }, { "epoch": 6.931070315206096, "grad_norm": 0.2522270679473877, "learning_rate": 3.069348127600555e-06, "loss": 0.0048, "step": 10005 }, { "epoch": 6.931763075857291, "grad_norm": 0.33458763360977173, "learning_rate": 3.068654646324549e-06, "loss": 0.004, "step": 10006 }, { "epoch": 6.9324558365084865, "grad_norm": 0.25092023611068726, "learning_rate": 3.067961165048544e-06, "loss": 0.0032, "step": 10007 }, { "epoch": 6.933148597159681, "grad_norm": 0.26223668456077576, "learning_rate": 3.0672676837725385e-06, "loss": 0.0048, "step": 10008 }, { "epoch": 6.933841357810876, "grad_norm": 0.23392276465892792, "learning_rate": 3.0665742024965326e-06, "loss": 0.0046, "step": 10009 }, { "epoch": 6.9345341184620715, "grad_norm": 0.17337779700756073, "learning_rate": 3.065880721220527e-06, "loss": 0.0028, "step": 10010 }, { "epoch": 6.935226879113267, "grad_norm": 0.18336065113544464, "learning_rate": 3.0651872399445217e-06, "loss": 0.003, "step": 10011 }, { "epoch": 6.935919639764461, "grad_norm": 0.311021625995636, "learning_rate": 3.0644937586685166e-06, "loss": 0.0046, "step": 10012 }, { "epoch": 6.936612400415656, "grad_norm": 0.35576215386390686, "learning_rate": 3.0638002773925107e-06, "loss": 0.0066, "step": 10013 }, { "epoch": 6.937305161066852, "grad_norm": 0.1976678967475891, "learning_rate": 3.063106796116505e-06, "loss": 0.0031, "step": 10014 }, { "epoch": 6.937997921718046, "grad_norm": 0.3793521523475647, "learning_rate": 3.0624133148404993e-06, "loss": 0.0042, "step": 10015 }, { "epoch": 6.938690682369241, "grad_norm": 0.3814072012901306, "learning_rate": 3.0617198335644938e-06, "loss": 0.004, "step": 10016 }, { "epoch": 6.939383443020437, "grad_norm": 0.20558927953243256, "learning_rate": 3.0610263522884887e-06, "loss": 0.0034, "step": 10017 }, { "epoch": 6.940076203671632, "grad_norm": 0.2317202389240265, "learning_rate": 3.060332871012483e-06, "loss": 0.0031, "step": 10018 }, { "epoch": 6.940768964322826, "grad_norm": 0.17047932744026184, "learning_rate": 3.0596393897364773e-06, "loss": 0.0031, "step": 10019 }, { "epoch": 6.941461724974022, "grad_norm": 0.43688803911209106, "learning_rate": 3.058945908460472e-06, "loss": 0.0046, "step": 10020 }, { "epoch": 6.942154485625217, "grad_norm": 0.1728978008031845, "learning_rate": 3.058252427184466e-06, "loss": 0.0031, "step": 10021 }, { "epoch": 6.942847246276411, "grad_norm": 0.264182984828949, "learning_rate": 3.057558945908461e-06, "loss": 0.0033, "step": 10022 }, { "epoch": 6.9435400069276065, "grad_norm": 0.17324721813201904, "learning_rate": 3.0568654646324553e-06, "loss": 0.0035, "step": 10023 }, { "epoch": 6.944232767578802, "grad_norm": 0.22715601325035095, "learning_rate": 3.0561719833564494e-06, "loss": 0.0038, "step": 10024 }, { "epoch": 6.944925528229996, "grad_norm": 0.2656223475933075, "learning_rate": 3.055478502080444e-06, "loss": 0.0043, "step": 10025 }, { "epoch": 6.9456182888811915, "grad_norm": 0.2931860089302063, "learning_rate": 3.0547850208044385e-06, "loss": 0.006, "step": 10026 }, { "epoch": 6.946311049532387, "grad_norm": 0.24384410679340363, "learning_rate": 3.0540915395284334e-06, "loss": 0.0046, "step": 10027 }, { "epoch": 6.947003810183581, "grad_norm": 0.3938216269016266, "learning_rate": 3.0533980582524275e-06, "loss": 0.0046, "step": 10028 }, { "epoch": 6.947696570834776, "grad_norm": 0.25126296281814575, "learning_rate": 3.052704576976422e-06, "loss": 0.0035, "step": 10029 }, { "epoch": 6.948389331485972, "grad_norm": 0.2991364896297455, "learning_rate": 3.052011095700416e-06, "loss": 0.0049, "step": 10030 }, { "epoch": 6.949082092137167, "grad_norm": 0.23555906116962433, "learning_rate": 3.0513176144244106e-06, "loss": 0.0028, "step": 10031 }, { "epoch": 6.949774852788361, "grad_norm": 0.1772664189338684, "learning_rate": 3.0506241331484055e-06, "loss": 0.0028, "step": 10032 }, { "epoch": 6.950467613439557, "grad_norm": 0.2503613829612732, "learning_rate": 3.0499306518723996e-06, "loss": 0.0029, "step": 10033 }, { "epoch": 6.951160374090752, "grad_norm": 0.22569991648197174, "learning_rate": 3.049237170596394e-06, "loss": 0.0026, "step": 10034 }, { "epoch": 6.951853134741946, "grad_norm": 0.2664303779602051, "learning_rate": 3.0485436893203886e-06, "loss": 0.0047, "step": 10035 }, { "epoch": 6.952545895393142, "grad_norm": 0.6259468197822571, "learning_rate": 3.0478502080443827e-06, "loss": 0.0033, "step": 10036 }, { "epoch": 6.953238656044337, "grad_norm": 0.22040881216526031, "learning_rate": 3.0471567267683776e-06, "loss": 0.004, "step": 10037 }, { "epoch": 6.953931416695532, "grad_norm": 0.1329197734594345, "learning_rate": 3.046463245492372e-06, "loss": 0.0026, "step": 10038 }, { "epoch": 6.9546241773467266, "grad_norm": 0.3064022362232208, "learning_rate": 3.0457697642163662e-06, "loss": 0.0032, "step": 10039 }, { "epoch": 6.955316937997922, "grad_norm": 0.34832891821861267, "learning_rate": 3.0450762829403607e-06, "loss": 0.0054, "step": 10040 }, { "epoch": 6.956009698649117, "grad_norm": 0.34846732020378113, "learning_rate": 3.0443828016643553e-06, "loss": 0.0051, "step": 10041 }, { "epoch": 6.9567024593003115, "grad_norm": 0.22778034210205078, "learning_rate": 3.0436893203883498e-06, "loss": 0.003, "step": 10042 }, { "epoch": 6.957395219951507, "grad_norm": 0.29621484875679016, "learning_rate": 3.0429958391123443e-06, "loss": 0.0039, "step": 10043 }, { "epoch": 6.958087980602702, "grad_norm": 0.2415006160736084, "learning_rate": 3.0423023578363388e-06, "loss": 0.0029, "step": 10044 }, { "epoch": 6.9587807412538965, "grad_norm": 0.3712511956691742, "learning_rate": 3.041608876560333e-06, "loss": 0.0023, "step": 10045 }, { "epoch": 6.959473501905092, "grad_norm": 0.25687456130981445, "learning_rate": 3.0409153952843274e-06, "loss": 0.0049, "step": 10046 }, { "epoch": 6.960166262556287, "grad_norm": 0.19633793830871582, "learning_rate": 3.0402219140083223e-06, "loss": 0.0037, "step": 10047 }, { "epoch": 6.960859023207481, "grad_norm": 0.31647029519081116, "learning_rate": 3.0395284327323164e-06, "loss": 0.0047, "step": 10048 }, { "epoch": 6.961551783858677, "grad_norm": 0.274926096200943, "learning_rate": 3.038834951456311e-06, "loss": 0.0051, "step": 10049 }, { "epoch": 6.962244544509872, "grad_norm": 0.23959867656230927, "learning_rate": 3.0381414701803054e-06, "loss": 0.0044, "step": 10050 }, { "epoch": 6.962937305161067, "grad_norm": 0.25957247614860535, "learning_rate": 3.0374479889042995e-06, "loss": 0.0039, "step": 10051 }, { "epoch": 6.963630065812262, "grad_norm": 0.23399588465690613, "learning_rate": 3.0367545076282944e-06, "loss": 0.0034, "step": 10052 }, { "epoch": 6.964322826463457, "grad_norm": 0.21195518970489502, "learning_rate": 3.036061026352289e-06, "loss": 0.0026, "step": 10053 }, { "epoch": 6.965015587114652, "grad_norm": 0.2954472005367279, "learning_rate": 3.035367545076283e-06, "loss": 0.0054, "step": 10054 }, { "epoch": 6.965708347765847, "grad_norm": 0.16996623575687408, "learning_rate": 3.0346740638002775e-06, "loss": 0.0028, "step": 10055 }, { "epoch": 6.966401108417042, "grad_norm": 0.3674592971801758, "learning_rate": 3.033980582524272e-06, "loss": 0.0032, "step": 10056 }, { "epoch": 6.967093869068237, "grad_norm": 0.16707442700862885, "learning_rate": 3.0332871012482666e-06, "loss": 0.0026, "step": 10057 }, { "epoch": 6.967786629719432, "grad_norm": 0.2636074125766754, "learning_rate": 3.032593619972261e-06, "loss": 0.0033, "step": 10058 }, { "epoch": 6.968479390370627, "grad_norm": 0.2006467580795288, "learning_rate": 3.0319001386962556e-06, "loss": 0.0031, "step": 10059 }, { "epoch": 6.969172151021822, "grad_norm": 0.27483102679252625, "learning_rate": 3.0312066574202497e-06, "loss": 0.0037, "step": 10060 }, { "epoch": 6.969864911673017, "grad_norm": 0.4790448546409607, "learning_rate": 3.030513176144244e-06, "loss": 0.0049, "step": 10061 }, { "epoch": 6.970557672324212, "grad_norm": 0.3278796672821045, "learning_rate": 3.029819694868239e-06, "loss": 0.0045, "step": 10062 }, { "epoch": 6.971250432975407, "grad_norm": 0.2741769254207611, "learning_rate": 3.029126213592233e-06, "loss": 0.0037, "step": 10063 }, { "epoch": 6.971943193626602, "grad_norm": 0.23450294137001038, "learning_rate": 3.0284327323162277e-06, "loss": 0.0036, "step": 10064 }, { "epoch": 6.972635954277797, "grad_norm": 0.5124578475952148, "learning_rate": 3.0277392510402222e-06, "loss": 0.0045, "step": 10065 }, { "epoch": 6.973328714928992, "grad_norm": 0.2673770487308502, "learning_rate": 3.0270457697642163e-06, "loss": 0.005, "step": 10066 }, { "epoch": 6.974021475580187, "grad_norm": 0.26082757115364075, "learning_rate": 3.0263522884882112e-06, "loss": 0.0046, "step": 10067 }, { "epoch": 6.974714236231382, "grad_norm": 0.4629848301410675, "learning_rate": 3.0256588072122058e-06, "loss": 0.0046, "step": 10068 }, { "epoch": 6.975406996882577, "grad_norm": 0.19269625842571259, "learning_rate": 3.0249653259362e-06, "loss": 0.0035, "step": 10069 }, { "epoch": 6.976099757533772, "grad_norm": 0.31474941968917847, "learning_rate": 3.0242718446601943e-06, "loss": 0.0041, "step": 10070 }, { "epoch": 6.976792518184967, "grad_norm": 0.29348450899124146, "learning_rate": 3.0235783633841884e-06, "loss": 0.0032, "step": 10071 }, { "epoch": 6.977485278836162, "grad_norm": 0.6254693269729614, "learning_rate": 3.0228848821081834e-06, "loss": 0.0039, "step": 10072 }, { "epoch": 6.978178039487357, "grad_norm": 0.1562761813402176, "learning_rate": 3.022191400832178e-06, "loss": 0.0024, "step": 10073 }, { "epoch": 6.978870800138552, "grad_norm": 0.14508678019046783, "learning_rate": 3.0214979195561724e-06, "loss": 0.0022, "step": 10074 }, { "epoch": 6.979563560789747, "grad_norm": 0.2147209197282791, "learning_rate": 3.0208044382801665e-06, "loss": 0.0025, "step": 10075 }, { "epoch": 6.980256321440942, "grad_norm": 0.37642332911491394, "learning_rate": 3.020110957004161e-06, "loss": 0.0052, "step": 10076 }, { "epoch": 6.980949082092137, "grad_norm": 0.13769502937793732, "learning_rate": 3.019417475728156e-06, "loss": 0.0028, "step": 10077 }, { "epoch": 6.981641842743333, "grad_norm": 0.26323574781417847, "learning_rate": 3.01872399445215e-06, "loss": 0.0033, "step": 10078 }, { "epoch": 6.982334603394527, "grad_norm": 0.19526053965091705, "learning_rate": 3.0180305131761445e-06, "loss": 0.0032, "step": 10079 }, { "epoch": 6.983027364045722, "grad_norm": 0.20103825628757477, "learning_rate": 3.017337031900139e-06, "loss": 0.003, "step": 10080 }, { "epoch": 6.983720124696918, "grad_norm": 0.18664149940013885, "learning_rate": 3.016643550624133e-06, "loss": 0.0033, "step": 10081 }, { "epoch": 6.984412885348112, "grad_norm": 0.4142363965511322, "learning_rate": 3.015950069348128e-06, "loss": 0.0049, "step": 10082 }, { "epoch": 6.985105645999307, "grad_norm": 0.27377110719680786, "learning_rate": 3.0152565880721226e-06, "loss": 0.0038, "step": 10083 }, { "epoch": 6.985798406650503, "grad_norm": 0.22474831342697144, "learning_rate": 3.0145631067961166e-06, "loss": 0.0035, "step": 10084 }, { "epoch": 6.986491167301697, "grad_norm": 0.19796189665794373, "learning_rate": 3.013869625520111e-06, "loss": 0.0034, "step": 10085 }, { "epoch": 6.987183927952892, "grad_norm": 0.181865856051445, "learning_rate": 3.0131761442441052e-06, "loss": 0.0026, "step": 10086 }, { "epoch": 6.9878766886040875, "grad_norm": 0.2278188169002533, "learning_rate": 3.0124826629681e-06, "loss": 0.0036, "step": 10087 }, { "epoch": 6.988569449255282, "grad_norm": 0.19749589264392853, "learning_rate": 3.0117891816920947e-06, "loss": 0.0043, "step": 10088 }, { "epoch": 6.989262209906477, "grad_norm": 0.1457054615020752, "learning_rate": 3.011095700416089e-06, "loss": 0.0033, "step": 10089 }, { "epoch": 6.9899549705576725, "grad_norm": 0.18413835763931274, "learning_rate": 3.0104022191400833e-06, "loss": 0.0031, "step": 10090 }, { "epoch": 6.990647731208867, "grad_norm": 0.3408403992652893, "learning_rate": 3.0097087378640778e-06, "loss": 0.0043, "step": 10091 }, { "epoch": 6.991340491860062, "grad_norm": 0.17260783910751343, "learning_rate": 3.0090152565880727e-06, "loss": 0.003, "step": 10092 }, { "epoch": 6.992033252511257, "grad_norm": 0.2064782828092575, "learning_rate": 3.008321775312067e-06, "loss": 0.0037, "step": 10093 }, { "epoch": 6.992726013162453, "grad_norm": 0.19827456772327423, "learning_rate": 3.0076282940360613e-06, "loss": 0.0028, "step": 10094 }, { "epoch": 6.993418773813647, "grad_norm": 0.22474165260791779, "learning_rate": 3.0069348127600554e-06, "loss": 0.0032, "step": 10095 }, { "epoch": 6.994111534464842, "grad_norm": 0.20684751868247986, "learning_rate": 3.00624133148405e-06, "loss": 0.0037, "step": 10096 }, { "epoch": 6.994804295116038, "grad_norm": 0.5600537061691284, "learning_rate": 3.005547850208045e-06, "loss": 0.0025, "step": 10097 }, { "epoch": 6.995497055767233, "grad_norm": 0.20565995573997498, "learning_rate": 3.0048543689320394e-06, "loss": 0.0035, "step": 10098 }, { "epoch": 6.996189816418427, "grad_norm": 0.2063204050064087, "learning_rate": 3.0041608876560334e-06, "loss": 0.0033, "step": 10099 }, { "epoch": 6.996882577069623, "grad_norm": 0.21847106516361237, "learning_rate": 3.003467406380028e-06, "loss": 0.0042, "step": 10100 }, { "epoch": 6.997575337720818, "grad_norm": 0.2257625311613083, "learning_rate": 3.002773925104022e-06, "loss": 0.0029, "step": 10101 }, { "epoch": 6.998268098372012, "grad_norm": 0.154760479927063, "learning_rate": 3.002080443828017e-06, "loss": 0.0027, "step": 10102 }, { "epoch": 6.9989608590232075, "grad_norm": 0.6939622163772583, "learning_rate": 3.0013869625520115e-06, "loss": 0.0047, "step": 10103 }, { "epoch": 6.999653619674403, "grad_norm": 0.17868445813655853, "learning_rate": 3.000693481276006e-06, "loss": 0.0028, "step": 10104 }, { "epoch": 6.999653619674403, "eval_loss": 0.3009836971759796, "eval_runtime": 7671.4969, "eval_samples_per_second": 1.043, "eval_steps_per_second": 0.033, "eval_wer": 12.362416576400294, "step": 10104 }, { "epoch": 7.000346380325597, "grad_norm": 0.2716224491596222, "learning_rate": 3e-06, "loss": 0.0053, "step": 10105 }, { "epoch": 7.0010391409767925, "grad_norm": 0.09159483015537262, "learning_rate": 2.9993065187239946e-06, "loss": 0.0019, "step": 10106 }, { "epoch": 7.001731901627988, "grad_norm": 0.5564009547233582, "learning_rate": 2.9986130374479895e-06, "loss": 0.0024, "step": 10107 }, { "epoch": 7.002424662279182, "grad_norm": 0.23817522823810577, "learning_rate": 2.9979195561719836e-06, "loss": 0.0026, "step": 10108 }, { "epoch": 7.003117422930377, "grad_norm": 0.11498336493968964, "learning_rate": 2.997226074895978e-06, "loss": 0.0018, "step": 10109 }, { "epoch": 7.003810183581573, "grad_norm": 0.28012868762016296, "learning_rate": 2.996532593619972e-06, "loss": 0.0046, "step": 10110 }, { "epoch": 7.004502944232768, "grad_norm": 0.31910011172294617, "learning_rate": 2.9958391123439667e-06, "loss": 0.0021, "step": 10111 }, { "epoch": 7.005195704883962, "grad_norm": 0.07397008687257767, "learning_rate": 2.9951456310679616e-06, "loss": 0.0018, "step": 10112 }, { "epoch": 7.005888465535158, "grad_norm": 0.12284370511770248, "learning_rate": 2.994452149791956e-06, "loss": 0.0022, "step": 10113 }, { "epoch": 7.006581226186353, "grad_norm": 0.11359403282403946, "learning_rate": 2.9937586685159502e-06, "loss": 0.0018, "step": 10114 }, { "epoch": 7.007273986837547, "grad_norm": 0.21793392300605774, "learning_rate": 2.9930651872399448e-06, "loss": 0.0024, "step": 10115 }, { "epoch": 7.007966747488743, "grad_norm": 0.1326545625925064, "learning_rate": 2.992371705963939e-06, "loss": 0.0027, "step": 10116 }, { "epoch": 7.008659508139938, "grad_norm": 0.24529285728931427, "learning_rate": 2.9916782246879338e-06, "loss": 0.0022, "step": 10117 }, { "epoch": 7.009352268791132, "grad_norm": 0.12091325968503952, "learning_rate": 2.9909847434119283e-06, "loss": 0.0019, "step": 10118 }, { "epoch": 7.0100450294423275, "grad_norm": 0.1797342598438263, "learning_rate": 2.9902912621359224e-06, "loss": 0.0023, "step": 10119 }, { "epoch": 7.010737790093523, "grad_norm": 0.18923820555210114, "learning_rate": 2.989597780859917e-06, "loss": 0.0022, "step": 10120 }, { "epoch": 7.011430550744718, "grad_norm": 0.7115009427070618, "learning_rate": 2.9889042995839114e-06, "loss": 0.0023, "step": 10121 }, { "epoch": 7.0121233113959125, "grad_norm": 0.10658278316259384, "learning_rate": 2.9882108183079063e-06, "loss": 0.002, "step": 10122 }, { "epoch": 7.012816072047108, "grad_norm": 0.1364043653011322, "learning_rate": 2.9875173370319004e-06, "loss": 0.0022, "step": 10123 }, { "epoch": 7.013508832698303, "grad_norm": 0.27905717492103577, "learning_rate": 2.986823855755895e-06, "loss": 0.003, "step": 10124 }, { "epoch": 7.014201593349497, "grad_norm": 0.11260130256414413, "learning_rate": 2.986130374479889e-06, "loss": 0.0019, "step": 10125 }, { "epoch": 7.014894354000693, "grad_norm": 0.09679900109767914, "learning_rate": 2.9854368932038835e-06, "loss": 0.0017, "step": 10126 }, { "epoch": 7.015587114651888, "grad_norm": 0.11848677694797516, "learning_rate": 2.9847434119278784e-06, "loss": 0.0021, "step": 10127 }, { "epoch": 7.016279875303082, "grad_norm": 0.2643411159515381, "learning_rate": 2.984049930651873e-06, "loss": 0.0024, "step": 10128 }, { "epoch": 7.016972635954278, "grad_norm": 0.14489522576332092, "learning_rate": 2.983356449375867e-06, "loss": 0.0026, "step": 10129 }, { "epoch": 7.017665396605473, "grad_norm": 0.149917334318161, "learning_rate": 2.9826629680998616e-06, "loss": 0.002, "step": 10130 }, { "epoch": 7.018358157256668, "grad_norm": 0.12082762271165848, "learning_rate": 2.9819694868238556e-06, "loss": 0.0027, "step": 10131 }, { "epoch": 7.019050917907863, "grad_norm": 0.15150338411331177, "learning_rate": 2.9812760055478506e-06, "loss": 0.0022, "step": 10132 }, { "epoch": 7.019743678559058, "grad_norm": 0.16063298285007477, "learning_rate": 2.980582524271845e-06, "loss": 0.0025, "step": 10133 }, { "epoch": 7.020436439210253, "grad_norm": 0.20068734884262085, "learning_rate": 2.979889042995839e-06, "loss": 0.0021, "step": 10134 }, { "epoch": 7.021129199861448, "grad_norm": 0.12279853969812393, "learning_rate": 2.9791955617198337e-06, "loss": 0.0026, "step": 10135 }, { "epoch": 7.021821960512643, "grad_norm": 0.12573403120040894, "learning_rate": 2.978502080443828e-06, "loss": 0.0022, "step": 10136 }, { "epoch": 7.022514721163838, "grad_norm": 0.23808933794498444, "learning_rate": 2.977808599167823e-06, "loss": 0.0025, "step": 10137 }, { "epoch": 7.0232074818150325, "grad_norm": 0.25830936431884766, "learning_rate": 2.977115117891817e-06, "loss": 0.0045, "step": 10138 }, { "epoch": 7.023900242466228, "grad_norm": 0.1041800007224083, "learning_rate": 2.9764216366158117e-06, "loss": 0.0017, "step": 10139 }, { "epoch": 7.024593003117423, "grad_norm": 0.1521730124950409, "learning_rate": 2.975728155339806e-06, "loss": 0.0027, "step": 10140 }, { "epoch": 7.025285763768618, "grad_norm": 0.19751764833927155, "learning_rate": 2.9750346740638003e-06, "loss": 0.0021, "step": 10141 }, { "epoch": 7.025978524419813, "grad_norm": 0.06638767570257187, "learning_rate": 2.9743411927877952e-06, "loss": 0.0017, "step": 10142 }, { "epoch": 7.026671285071008, "grad_norm": 0.2514634430408478, "learning_rate": 2.9736477115117898e-06, "loss": 0.0035, "step": 10143 }, { "epoch": 7.027364045722203, "grad_norm": 0.07468952983617783, "learning_rate": 2.972954230235784e-06, "loss": 0.0016, "step": 10144 }, { "epoch": 7.028056806373398, "grad_norm": 0.16920965909957886, "learning_rate": 2.9722607489597784e-06, "loss": 0.0019, "step": 10145 }, { "epoch": 7.028749567024593, "grad_norm": 0.11712231487035751, "learning_rate": 2.9715672676837724e-06, "loss": 0.0018, "step": 10146 }, { "epoch": 7.029442327675788, "grad_norm": 0.10406385362148285, "learning_rate": 2.9708737864077674e-06, "loss": 0.0027, "step": 10147 }, { "epoch": 7.030135088326983, "grad_norm": 0.2668299674987793, "learning_rate": 2.970180305131762e-06, "loss": 0.0025, "step": 10148 }, { "epoch": 7.030827848978178, "grad_norm": 0.13492488861083984, "learning_rate": 2.969486823855756e-06, "loss": 0.0018, "step": 10149 }, { "epoch": 7.031520609629373, "grad_norm": 0.10186266899108887, "learning_rate": 2.9687933425797505e-06, "loss": 0.0021, "step": 10150 }, { "epoch": 7.0322133702805685, "grad_norm": 0.2647084891796112, "learning_rate": 2.968099861303745e-06, "loss": 0.003, "step": 10151 }, { "epoch": 7.032906130931763, "grad_norm": 0.20142273604869843, "learning_rate": 2.96740638002774e-06, "loss": 0.0028, "step": 10152 }, { "epoch": 7.033598891582958, "grad_norm": 0.11634217202663422, "learning_rate": 2.966712898751734e-06, "loss": 0.0018, "step": 10153 }, { "epoch": 7.034291652234153, "grad_norm": 0.07488865405321121, "learning_rate": 2.9660194174757285e-06, "loss": 0.0015, "step": 10154 }, { "epoch": 7.034984412885348, "grad_norm": 0.11648157238960266, "learning_rate": 2.9653259361997226e-06, "loss": 0.0028, "step": 10155 }, { "epoch": 7.035677173536543, "grad_norm": 0.12555374205112457, "learning_rate": 2.964632454923717e-06, "loss": 0.0021, "step": 10156 }, { "epoch": 7.036369934187738, "grad_norm": 0.21055260300636292, "learning_rate": 2.963938973647712e-06, "loss": 0.0028, "step": 10157 }, { "epoch": 7.037062694838933, "grad_norm": 0.14861522614955902, "learning_rate": 2.963245492371706e-06, "loss": 0.0022, "step": 10158 }, { "epoch": 7.037755455490128, "grad_norm": 0.10379479825496674, "learning_rate": 2.9625520110957006e-06, "loss": 0.0017, "step": 10159 }, { "epoch": 7.038448216141323, "grad_norm": 0.10191231220960617, "learning_rate": 2.961858529819695e-06, "loss": 0.002, "step": 10160 }, { "epoch": 7.039140976792519, "grad_norm": 1.4535095691680908, "learning_rate": 2.9611650485436892e-06, "loss": 0.0025, "step": 10161 }, { "epoch": 7.039833737443713, "grad_norm": 0.10545312613248825, "learning_rate": 2.960471567267684e-06, "loss": 0.0019, "step": 10162 }, { "epoch": 7.040526498094908, "grad_norm": 0.15498687326908112, "learning_rate": 2.9597780859916787e-06, "loss": 0.0024, "step": 10163 }, { "epoch": 7.0412192587461035, "grad_norm": 0.13552707433700562, "learning_rate": 2.9590846047156728e-06, "loss": 0.0017, "step": 10164 }, { "epoch": 7.041912019397298, "grad_norm": 0.07262732833623886, "learning_rate": 2.9583911234396673e-06, "loss": 0.0017, "step": 10165 }, { "epoch": 7.042604780048493, "grad_norm": 0.1755642294883728, "learning_rate": 2.957697642163662e-06, "loss": 0.0018, "step": 10166 }, { "epoch": 7.0432975406996885, "grad_norm": 0.15847671031951904, "learning_rate": 2.9570041608876567e-06, "loss": 0.0021, "step": 10167 }, { "epoch": 7.043990301350883, "grad_norm": 0.17660565674304962, "learning_rate": 2.956310679611651e-06, "loss": 0.0027, "step": 10168 }, { "epoch": 7.044683062002078, "grad_norm": 0.06941859424114227, "learning_rate": 2.9556171983356453e-06, "loss": 0.0015, "step": 10169 }, { "epoch": 7.0453758226532734, "grad_norm": 0.15044160187244415, "learning_rate": 2.9549237170596394e-06, "loss": 0.0021, "step": 10170 }, { "epoch": 7.046068583304469, "grad_norm": 0.2087961882352829, "learning_rate": 2.954230235783634e-06, "loss": 0.0023, "step": 10171 }, { "epoch": 7.046761343955663, "grad_norm": 0.2598675787448883, "learning_rate": 2.953536754507629e-06, "loss": 0.0024, "step": 10172 }, { "epoch": 7.047454104606858, "grad_norm": 0.15336377918720245, "learning_rate": 2.952843273231623e-06, "loss": 0.0024, "step": 10173 }, { "epoch": 7.048146865258054, "grad_norm": 0.3325541615486145, "learning_rate": 2.9521497919556174e-06, "loss": 0.0038, "step": 10174 }, { "epoch": 7.048839625909248, "grad_norm": 0.1810804307460785, "learning_rate": 2.951456310679612e-06, "loss": 0.0021, "step": 10175 }, { "epoch": 7.049532386560443, "grad_norm": 0.2572703957557678, "learning_rate": 2.950762829403606e-06, "loss": 0.0034, "step": 10176 }, { "epoch": 7.050225147211639, "grad_norm": 0.09396033734083176, "learning_rate": 2.950069348127601e-06, "loss": 0.0015, "step": 10177 }, { "epoch": 7.050917907862833, "grad_norm": 0.3814314305782318, "learning_rate": 2.9493758668515955e-06, "loss": 0.002, "step": 10178 }, { "epoch": 7.051610668514028, "grad_norm": 0.14968882501125336, "learning_rate": 2.9486823855755896e-06, "loss": 0.0023, "step": 10179 }, { "epoch": 7.052303429165224, "grad_norm": 0.23168759047985077, "learning_rate": 2.947988904299584e-06, "loss": 0.0026, "step": 10180 }, { "epoch": 7.052996189816419, "grad_norm": 0.12254835665225983, "learning_rate": 2.9472954230235786e-06, "loss": 0.0017, "step": 10181 }, { "epoch": 7.053688950467613, "grad_norm": 0.11456603556871414, "learning_rate": 2.946601941747573e-06, "loss": 0.0016, "step": 10182 }, { "epoch": 7.0543817111188085, "grad_norm": 0.14420464634895325, "learning_rate": 2.9459084604715676e-06, "loss": 0.0017, "step": 10183 }, { "epoch": 7.055074471770004, "grad_norm": 0.10403777658939362, "learning_rate": 2.945214979195562e-06, "loss": 0.0017, "step": 10184 }, { "epoch": 7.055767232421198, "grad_norm": 0.1886509209871292, "learning_rate": 2.944521497919556e-06, "loss": 0.0026, "step": 10185 }, { "epoch": 7.0564599930723935, "grad_norm": 0.08650385588407516, "learning_rate": 2.9438280166435507e-06, "loss": 0.0017, "step": 10186 }, { "epoch": 7.057152753723589, "grad_norm": 0.09474591165781021, "learning_rate": 2.9431345353675457e-06, "loss": 0.0021, "step": 10187 }, { "epoch": 7.057845514374783, "grad_norm": 0.40305501222610474, "learning_rate": 2.9424410540915397e-06, "loss": 0.0029, "step": 10188 }, { "epoch": 7.058538275025978, "grad_norm": 0.11685768514871597, "learning_rate": 2.9417475728155342e-06, "loss": 0.0017, "step": 10189 }, { "epoch": 7.059231035677174, "grad_norm": 0.17035429179668427, "learning_rate": 2.9410540915395288e-06, "loss": 0.0019, "step": 10190 }, { "epoch": 7.059923796328369, "grad_norm": 0.10403960943222046, "learning_rate": 2.940360610263523e-06, "loss": 0.0019, "step": 10191 }, { "epoch": 7.060616556979563, "grad_norm": 0.08097000420093536, "learning_rate": 2.9396671289875178e-06, "loss": 0.0017, "step": 10192 }, { "epoch": 7.061309317630759, "grad_norm": 0.0710969939827919, "learning_rate": 2.9389736477115123e-06, "loss": 0.0014, "step": 10193 }, { "epoch": 7.062002078281954, "grad_norm": 0.13937436044216156, "learning_rate": 2.9382801664355064e-06, "loss": 0.002, "step": 10194 }, { "epoch": 7.062694838933148, "grad_norm": 0.2913510501384735, "learning_rate": 2.937586685159501e-06, "loss": 0.0036, "step": 10195 }, { "epoch": 7.063387599584344, "grad_norm": 0.11702926456928253, "learning_rate": 2.9368932038834954e-06, "loss": 0.0018, "step": 10196 }, { "epoch": 7.064080360235539, "grad_norm": 0.17788025736808777, "learning_rate": 2.93619972260749e-06, "loss": 0.0037, "step": 10197 }, { "epoch": 7.064773120886733, "grad_norm": 0.11324986815452576, "learning_rate": 2.9355062413314844e-06, "loss": 0.0017, "step": 10198 }, { "epoch": 7.0654658815379285, "grad_norm": 0.22492696344852448, "learning_rate": 2.934812760055479e-06, "loss": 0.0023, "step": 10199 }, { "epoch": 7.066158642189124, "grad_norm": 0.1243765652179718, "learning_rate": 2.934119278779473e-06, "loss": 0.0018, "step": 10200 }, { "epoch": 7.066851402840319, "grad_norm": 0.09988114982843399, "learning_rate": 2.9334257975034675e-06, "loss": 0.0017, "step": 10201 }, { "epoch": 7.0675441634915135, "grad_norm": 0.13032008707523346, "learning_rate": 2.9327323162274625e-06, "loss": 0.0019, "step": 10202 }, { "epoch": 7.068236924142709, "grad_norm": 0.10445775091648102, "learning_rate": 2.9320388349514565e-06, "loss": 0.0017, "step": 10203 }, { "epoch": 7.068929684793904, "grad_norm": 0.12967832386493683, "learning_rate": 2.931345353675451e-06, "loss": 0.0017, "step": 10204 }, { "epoch": 7.069622445445098, "grad_norm": 0.4911452829837799, "learning_rate": 2.9306518723994456e-06, "loss": 0.002, "step": 10205 }, { "epoch": 7.070315206096294, "grad_norm": 0.10737442970275879, "learning_rate": 2.9299583911234396e-06, "loss": 0.0016, "step": 10206 }, { "epoch": 7.071007966747489, "grad_norm": 0.07060238718986511, "learning_rate": 2.9292649098474346e-06, "loss": 0.0016, "step": 10207 }, { "epoch": 7.071700727398683, "grad_norm": 0.10306859761476517, "learning_rate": 2.928571428571429e-06, "loss": 0.0015, "step": 10208 }, { "epoch": 7.072393488049879, "grad_norm": 0.12221773713827133, "learning_rate": 2.927877947295423e-06, "loss": 0.002, "step": 10209 }, { "epoch": 7.073086248701074, "grad_norm": 0.12454909831285477, "learning_rate": 2.9271844660194177e-06, "loss": 0.0021, "step": 10210 }, { "epoch": 7.073779009352269, "grad_norm": 0.24364261329174042, "learning_rate": 2.9264909847434118e-06, "loss": 0.0029, "step": 10211 }, { "epoch": 7.074471770003464, "grad_norm": 0.1029602512717247, "learning_rate": 2.9257975034674067e-06, "loss": 0.0019, "step": 10212 }, { "epoch": 7.075164530654659, "grad_norm": 0.1706295758485794, "learning_rate": 2.9251040221914012e-06, "loss": 0.002, "step": 10213 }, { "epoch": 7.075857291305854, "grad_norm": 0.07938599586486816, "learning_rate": 2.9244105409153957e-06, "loss": 0.0016, "step": 10214 }, { "epoch": 7.076550051957049, "grad_norm": 0.09258869290351868, "learning_rate": 2.92371705963939e-06, "loss": 0.0015, "step": 10215 }, { "epoch": 7.077242812608244, "grad_norm": 0.14611390233039856, "learning_rate": 2.9230235783633843e-06, "loss": 0.0018, "step": 10216 }, { "epoch": 7.077935573259439, "grad_norm": 0.1732112616300583, "learning_rate": 2.9223300970873793e-06, "loss": 0.0029, "step": 10217 }, { "epoch": 7.0786283339106335, "grad_norm": 0.11785987764596939, "learning_rate": 2.9216366158113733e-06, "loss": 0.0022, "step": 10218 }, { "epoch": 7.079321094561829, "grad_norm": 0.13899289071559906, "learning_rate": 2.920943134535368e-06, "loss": 0.0019, "step": 10219 }, { "epoch": 7.080013855213024, "grad_norm": 0.10356828570365906, "learning_rate": 2.9202496532593624e-06, "loss": 0.0018, "step": 10220 }, { "epoch": 7.080706615864219, "grad_norm": 0.1071179062128067, "learning_rate": 2.9195561719833564e-06, "loss": 0.0024, "step": 10221 }, { "epoch": 7.081399376515414, "grad_norm": 0.1809478998184204, "learning_rate": 2.9188626907073514e-06, "loss": 0.002, "step": 10222 }, { "epoch": 7.082092137166609, "grad_norm": 0.21219773590564728, "learning_rate": 2.918169209431346e-06, "loss": 0.0021, "step": 10223 }, { "epoch": 7.082784897817804, "grad_norm": 0.1390245407819748, "learning_rate": 2.91747572815534e-06, "loss": 0.0019, "step": 10224 }, { "epoch": 7.083477658468999, "grad_norm": 0.18222716450691223, "learning_rate": 2.9167822468793345e-06, "loss": 0.0018, "step": 10225 }, { "epoch": 7.084170419120194, "grad_norm": 0.13278789818286896, "learning_rate": 2.9160887656033286e-06, "loss": 0.0021, "step": 10226 }, { "epoch": 7.084863179771389, "grad_norm": 0.3092099726200104, "learning_rate": 2.9153952843273235e-06, "loss": 0.0025, "step": 10227 }, { "epoch": 7.085555940422584, "grad_norm": 0.08629997819662094, "learning_rate": 2.914701803051318e-06, "loss": 0.0018, "step": 10228 }, { "epoch": 7.086248701073779, "grad_norm": 0.10827749222517014, "learning_rate": 2.9140083217753125e-06, "loss": 0.0019, "step": 10229 }, { "epoch": 7.086941461724974, "grad_norm": 0.15044258534908295, "learning_rate": 2.9133148404993066e-06, "loss": 0.0018, "step": 10230 }, { "epoch": 7.0876342223761695, "grad_norm": 0.10118412226438522, "learning_rate": 2.912621359223301e-06, "loss": 0.002, "step": 10231 }, { "epoch": 7.088326983027364, "grad_norm": 0.18661916255950928, "learning_rate": 2.911927877947296e-06, "loss": 0.0033, "step": 10232 }, { "epoch": 7.089019743678559, "grad_norm": 0.1472817361354828, "learning_rate": 2.91123439667129e-06, "loss": 0.002, "step": 10233 }, { "epoch": 7.089712504329754, "grad_norm": 0.31590205430984497, "learning_rate": 2.9105409153952847e-06, "loss": 0.0023, "step": 10234 }, { "epoch": 7.090405264980949, "grad_norm": 0.3203689754009247, "learning_rate": 2.9098474341192787e-06, "loss": 0.0039, "step": 10235 }, { "epoch": 7.091098025632144, "grad_norm": 0.141969233751297, "learning_rate": 2.9091539528432732e-06, "loss": 0.0017, "step": 10236 }, { "epoch": 7.091790786283339, "grad_norm": 0.10086339712142944, "learning_rate": 2.908460471567268e-06, "loss": 0.0015, "step": 10237 }, { "epoch": 7.092483546934534, "grad_norm": 0.08305101096630096, "learning_rate": 2.9077669902912627e-06, "loss": 0.0016, "step": 10238 }, { "epoch": 7.093176307585729, "grad_norm": 0.1501968502998352, "learning_rate": 2.9070735090152568e-06, "loss": 0.0017, "step": 10239 }, { "epoch": 7.093869068236924, "grad_norm": 0.12165763229131699, "learning_rate": 2.9063800277392513e-06, "loss": 0.0019, "step": 10240 }, { "epoch": 7.09456182888812, "grad_norm": 0.2288498729467392, "learning_rate": 2.9056865464632454e-06, "loss": 0.0021, "step": 10241 }, { "epoch": 7.095254589539314, "grad_norm": 0.1075778380036354, "learning_rate": 2.9049930651872403e-06, "loss": 0.0017, "step": 10242 }, { "epoch": 7.095947350190509, "grad_norm": 0.17690244317054749, "learning_rate": 2.904299583911235e-06, "loss": 0.0021, "step": 10243 }, { "epoch": 7.0966401108417045, "grad_norm": 0.15283559262752533, "learning_rate": 2.9036061026352293e-06, "loss": 0.0023, "step": 10244 }, { "epoch": 7.097332871492899, "grad_norm": 0.07930505275726318, "learning_rate": 2.9029126213592234e-06, "loss": 0.0015, "step": 10245 }, { "epoch": 7.098025632144094, "grad_norm": 0.1359519362449646, "learning_rate": 2.902219140083218e-06, "loss": 0.0018, "step": 10246 }, { "epoch": 7.0987183927952895, "grad_norm": 0.1672157496213913, "learning_rate": 2.901525658807213e-06, "loss": 0.0024, "step": 10247 }, { "epoch": 7.099411153446484, "grad_norm": 0.1201040968298912, "learning_rate": 2.900832177531207e-06, "loss": 0.0015, "step": 10248 }, { "epoch": 7.100103914097679, "grad_norm": 0.11735248565673828, "learning_rate": 2.9001386962552015e-06, "loss": 0.0017, "step": 10249 }, { "epoch": 7.100796674748874, "grad_norm": 0.08093228191137314, "learning_rate": 2.8994452149791955e-06, "loss": 0.0016, "step": 10250 }, { "epoch": 7.101489435400069, "grad_norm": 0.13805758953094482, "learning_rate": 2.89875173370319e-06, "loss": 0.0028, "step": 10251 }, { "epoch": 7.102182196051264, "grad_norm": 0.10857182741165161, "learning_rate": 2.898058252427185e-06, "loss": 0.0019, "step": 10252 }, { "epoch": 7.102874956702459, "grad_norm": 0.13954514265060425, "learning_rate": 2.8973647711511795e-06, "loss": 0.0018, "step": 10253 }, { "epoch": 7.103567717353655, "grad_norm": 0.10731736570596695, "learning_rate": 2.8966712898751736e-06, "loss": 0.0015, "step": 10254 }, { "epoch": 7.104260478004849, "grad_norm": 0.12175274640321732, "learning_rate": 2.895977808599168e-06, "loss": 0.0028, "step": 10255 }, { "epoch": 7.104953238656044, "grad_norm": 0.10490991920232773, "learning_rate": 2.895284327323162e-06, "loss": 0.0014, "step": 10256 }, { "epoch": 7.10564599930724, "grad_norm": 0.4596697688102722, "learning_rate": 2.894590846047157e-06, "loss": 0.0026, "step": 10257 }, { "epoch": 7.106338759958434, "grad_norm": 0.07938335090875626, "learning_rate": 2.8938973647711516e-06, "loss": 0.0016, "step": 10258 }, { "epoch": 7.107031520609629, "grad_norm": 0.08749832957983017, "learning_rate": 2.8932038834951457e-06, "loss": 0.0016, "step": 10259 }, { "epoch": 7.107724281260825, "grad_norm": 0.09177283942699432, "learning_rate": 2.8925104022191402e-06, "loss": 0.0015, "step": 10260 }, { "epoch": 7.10841704191202, "grad_norm": 0.06884568929672241, "learning_rate": 2.8918169209431347e-06, "loss": 0.0014, "step": 10261 }, { "epoch": 7.109109802563214, "grad_norm": 0.16840292513370514, "learning_rate": 2.8911234396671297e-06, "loss": 0.0023, "step": 10262 }, { "epoch": 7.1098025632144095, "grad_norm": 0.20110274851322174, "learning_rate": 2.8904299583911237e-06, "loss": 0.0016, "step": 10263 }, { "epoch": 7.110495323865605, "grad_norm": 0.1194235309958458, "learning_rate": 2.8897364771151183e-06, "loss": 0.0018, "step": 10264 }, { "epoch": 7.111188084516799, "grad_norm": 0.1636734902858734, "learning_rate": 2.8890429958391123e-06, "loss": 0.0019, "step": 10265 }, { "epoch": 7.1118808451679945, "grad_norm": 0.7341674566268921, "learning_rate": 2.888349514563107e-06, "loss": 0.0018, "step": 10266 }, { "epoch": 7.11257360581919, "grad_norm": 0.12724405527114868, "learning_rate": 2.8876560332871018e-06, "loss": 0.0017, "step": 10267 }, { "epoch": 7.113266366470384, "grad_norm": 0.5825866460800171, "learning_rate": 2.8869625520110963e-06, "loss": 0.0026, "step": 10268 }, { "epoch": 7.113959127121579, "grad_norm": 0.1154637336730957, "learning_rate": 2.8862690707350904e-06, "loss": 0.0016, "step": 10269 }, { "epoch": 7.114651887772775, "grad_norm": 0.05893559381365776, "learning_rate": 2.885575589459085e-06, "loss": 0.0014, "step": 10270 }, { "epoch": 7.115344648423969, "grad_norm": 0.1777360886335373, "learning_rate": 2.884882108183079e-06, "loss": 0.0019, "step": 10271 }, { "epoch": 7.116037409075164, "grad_norm": 0.4561309516429901, "learning_rate": 2.884188626907074e-06, "loss": 0.0022, "step": 10272 }, { "epoch": 7.11673016972636, "grad_norm": 0.10777769982814789, "learning_rate": 2.8834951456310684e-06, "loss": 0.0016, "step": 10273 }, { "epoch": 7.117422930377555, "grad_norm": 0.06441865861415863, "learning_rate": 2.8828016643550625e-06, "loss": 0.0013, "step": 10274 }, { "epoch": 7.118115691028749, "grad_norm": 0.23009923100471497, "learning_rate": 2.882108183079057e-06, "loss": 0.0024, "step": 10275 }, { "epoch": 7.118808451679945, "grad_norm": 0.17869342863559723, "learning_rate": 2.8814147018030515e-06, "loss": 0.0024, "step": 10276 }, { "epoch": 7.11950121233114, "grad_norm": 0.1781490296125412, "learning_rate": 2.8807212205270465e-06, "loss": 0.0016, "step": 10277 }, { "epoch": 7.120193972982334, "grad_norm": 0.12733760476112366, "learning_rate": 2.8800277392510405e-06, "loss": 0.0018, "step": 10278 }, { "epoch": 7.1208867336335295, "grad_norm": 0.38746392726898193, "learning_rate": 2.879334257975035e-06, "loss": 0.0037, "step": 10279 }, { "epoch": 7.121579494284725, "grad_norm": 0.12233682721853256, "learning_rate": 2.878640776699029e-06, "loss": 0.0017, "step": 10280 }, { "epoch": 7.12227225493592, "grad_norm": 0.1713201105594635, "learning_rate": 2.8779472954230237e-06, "loss": 0.0034, "step": 10281 }, { "epoch": 7.1229650155871145, "grad_norm": 0.13770155608654022, "learning_rate": 2.8772538141470186e-06, "loss": 0.0025, "step": 10282 }, { "epoch": 7.12365777623831, "grad_norm": 0.08295518159866333, "learning_rate": 2.8765603328710127e-06, "loss": 0.0017, "step": 10283 }, { "epoch": 7.124350536889505, "grad_norm": 0.3251071274280548, "learning_rate": 2.875866851595007e-06, "loss": 0.002, "step": 10284 }, { "epoch": 7.125043297540699, "grad_norm": 0.17440323531627655, "learning_rate": 2.8751733703190017e-06, "loss": 0.0031, "step": 10285 }, { "epoch": 7.125736058191895, "grad_norm": 0.395295649766922, "learning_rate": 2.8744798890429958e-06, "loss": 0.003, "step": 10286 }, { "epoch": 7.12642881884309, "grad_norm": 0.09597323834896088, "learning_rate": 2.8737864077669903e-06, "loss": 0.0016, "step": 10287 }, { "epoch": 7.127121579494284, "grad_norm": 0.19693192839622498, "learning_rate": 2.8730929264909852e-06, "loss": 0.002, "step": 10288 }, { "epoch": 7.12781434014548, "grad_norm": 0.09158217161893845, "learning_rate": 2.8723994452149793e-06, "loss": 0.0019, "step": 10289 }, { "epoch": 7.128507100796675, "grad_norm": 0.10716332495212555, "learning_rate": 2.871705963938974e-06, "loss": 0.002, "step": 10290 }, { "epoch": 7.129199861447869, "grad_norm": 0.15890870988368988, "learning_rate": 2.8710124826629683e-06, "loss": 0.0025, "step": 10291 }, { "epoch": 7.129892622099065, "grad_norm": 0.11464457213878632, "learning_rate": 2.8703190013869624e-06, "loss": 0.0018, "step": 10292 }, { "epoch": 7.13058538275026, "grad_norm": 0.41781777143478394, "learning_rate": 2.8696255201109573e-06, "loss": 0.0027, "step": 10293 }, { "epoch": 7.131278143401455, "grad_norm": 0.07067586481571198, "learning_rate": 2.868932038834952e-06, "loss": 0.0015, "step": 10294 }, { "epoch": 7.1319709040526496, "grad_norm": 0.1502179205417633, "learning_rate": 2.868238557558946e-06, "loss": 0.0018, "step": 10295 }, { "epoch": 7.132663664703845, "grad_norm": 0.11330417543649673, "learning_rate": 2.8675450762829405e-06, "loss": 0.0023, "step": 10296 }, { "epoch": 7.13335642535504, "grad_norm": 0.09089425951242447, "learning_rate": 2.866851595006935e-06, "loss": 0.0017, "step": 10297 }, { "epoch": 7.1340491860062345, "grad_norm": 0.13021895289421082, "learning_rate": 2.8661581137309295e-06, "loss": 0.002, "step": 10298 }, { "epoch": 7.13474194665743, "grad_norm": 0.2148451954126358, "learning_rate": 2.865464632454924e-06, "loss": 0.0021, "step": 10299 }, { "epoch": 7.135434707308625, "grad_norm": 0.2965351343154907, "learning_rate": 2.8647711511789185e-06, "loss": 0.0021, "step": 10300 }, { "epoch": 7.13612746795982, "grad_norm": 0.13132959604263306, "learning_rate": 2.8640776699029126e-06, "loss": 0.0021, "step": 10301 }, { "epoch": 7.136820228611015, "grad_norm": 0.1508658528327942, "learning_rate": 2.863384188626907e-06, "loss": 0.0016, "step": 10302 }, { "epoch": 7.13751298926221, "grad_norm": 0.19183248281478882, "learning_rate": 2.862690707350902e-06, "loss": 0.0016, "step": 10303 }, { "epoch": 7.138205749913405, "grad_norm": 0.07367881387472153, "learning_rate": 2.861997226074896e-06, "loss": 0.0016, "step": 10304 }, { "epoch": 7.1388985105646, "grad_norm": 0.33188414573669434, "learning_rate": 2.8613037447988906e-06, "loss": 0.0019, "step": 10305 }, { "epoch": 7.139591271215795, "grad_norm": 0.23595505952835083, "learning_rate": 2.860610263522885e-06, "loss": 0.0023, "step": 10306 }, { "epoch": 7.14028403186699, "grad_norm": 0.1040181890130043, "learning_rate": 2.8599167822468792e-06, "loss": 0.0017, "step": 10307 }, { "epoch": 7.140976792518185, "grad_norm": 0.13215972483158112, "learning_rate": 2.859223300970874e-06, "loss": 0.0024, "step": 10308 }, { "epoch": 7.14166955316938, "grad_norm": 0.11046463996171951, "learning_rate": 2.8585298196948687e-06, "loss": 0.0018, "step": 10309 }, { "epoch": 7.142362313820575, "grad_norm": 0.2333577275276184, "learning_rate": 2.8578363384188627e-06, "loss": 0.0025, "step": 10310 }, { "epoch": 7.14305507447177, "grad_norm": 0.18429487943649292, "learning_rate": 2.8571428571428573e-06, "loss": 0.0028, "step": 10311 }, { "epoch": 7.143747835122965, "grad_norm": 0.12696926295757294, "learning_rate": 2.8564493758668518e-06, "loss": 0.0017, "step": 10312 }, { "epoch": 7.14444059577416, "grad_norm": 0.1349647343158722, "learning_rate": 2.8557558945908463e-06, "loss": 0.0021, "step": 10313 }, { "epoch": 7.145133356425355, "grad_norm": 0.1701875627040863, "learning_rate": 2.8550624133148408e-06, "loss": 0.002, "step": 10314 }, { "epoch": 7.14582611707655, "grad_norm": 0.1400025486946106, "learning_rate": 2.8543689320388353e-06, "loss": 0.0024, "step": 10315 }, { "epoch": 7.146518877727745, "grad_norm": 0.19550296664237976, "learning_rate": 2.8536754507628294e-06, "loss": 0.002, "step": 10316 }, { "epoch": 7.14721163837894, "grad_norm": 0.2161799818277359, "learning_rate": 2.852981969486824e-06, "loss": 0.0024, "step": 10317 }, { "epoch": 7.147904399030135, "grad_norm": 0.06708332896232605, "learning_rate": 2.852288488210819e-06, "loss": 0.0016, "step": 10318 }, { "epoch": 7.14859715968133, "grad_norm": 0.11973574757575989, "learning_rate": 2.851595006934813e-06, "loss": 0.0025, "step": 10319 }, { "epoch": 7.149289920332525, "grad_norm": 0.08611651510000229, "learning_rate": 2.8509015256588074e-06, "loss": 0.0016, "step": 10320 }, { "epoch": 7.14998268098372, "grad_norm": 0.13845081627368927, "learning_rate": 2.850208044382802e-06, "loss": 0.0019, "step": 10321 }, { "epoch": 7.150675441634915, "grad_norm": 0.06957918405532837, "learning_rate": 2.849514563106796e-06, "loss": 0.0017, "step": 10322 }, { "epoch": 7.15136820228611, "grad_norm": 0.10618674755096436, "learning_rate": 2.848821081830791e-06, "loss": 0.0021, "step": 10323 }, { "epoch": 7.1520609629373055, "grad_norm": 0.4333973824977875, "learning_rate": 2.8481276005547855e-06, "loss": 0.004, "step": 10324 }, { "epoch": 7.1527537235885, "grad_norm": 0.08355173468589783, "learning_rate": 2.8474341192787795e-06, "loss": 0.0016, "step": 10325 }, { "epoch": 7.153446484239695, "grad_norm": 0.07392586767673492, "learning_rate": 2.846740638002774e-06, "loss": 0.0015, "step": 10326 }, { "epoch": 7.1541392448908905, "grad_norm": 0.13416960835456848, "learning_rate": 2.846047156726768e-06, "loss": 0.0016, "step": 10327 }, { "epoch": 7.154832005542085, "grad_norm": 0.1664823740720749, "learning_rate": 2.845353675450763e-06, "loss": 0.0019, "step": 10328 }, { "epoch": 7.15552476619328, "grad_norm": 0.07663372159004211, "learning_rate": 2.8446601941747576e-06, "loss": 0.0015, "step": 10329 }, { "epoch": 7.156217526844475, "grad_norm": 0.27967923879623413, "learning_rate": 2.843966712898752e-06, "loss": 0.002, "step": 10330 }, { "epoch": 7.15691028749567, "grad_norm": 0.15132340788841248, "learning_rate": 2.843273231622746e-06, "loss": 0.0018, "step": 10331 }, { "epoch": 7.157603048146865, "grad_norm": 0.114525206387043, "learning_rate": 2.8425797503467407e-06, "loss": 0.0016, "step": 10332 }, { "epoch": 7.15829580879806, "grad_norm": 0.07676137238740921, "learning_rate": 2.8418862690707356e-06, "loss": 0.0016, "step": 10333 }, { "epoch": 7.158988569449256, "grad_norm": 0.1388207972049713, "learning_rate": 2.8411927877947297e-06, "loss": 0.002, "step": 10334 }, { "epoch": 7.15968133010045, "grad_norm": 0.33709225058555603, "learning_rate": 2.8404993065187242e-06, "loss": 0.0031, "step": 10335 }, { "epoch": 7.160374090751645, "grad_norm": 0.1640513390302658, "learning_rate": 2.8398058252427187e-06, "loss": 0.0024, "step": 10336 }, { "epoch": 7.161066851402841, "grad_norm": 0.13665960729122162, "learning_rate": 2.839112343966713e-06, "loss": 0.0018, "step": 10337 }, { "epoch": 7.161759612054035, "grad_norm": 0.09487101435661316, "learning_rate": 2.8384188626907078e-06, "loss": 0.0017, "step": 10338 }, { "epoch": 7.16245237270523, "grad_norm": 0.12235639989376068, "learning_rate": 2.8377253814147023e-06, "loss": 0.0016, "step": 10339 }, { "epoch": 7.1631451333564256, "grad_norm": 0.14719174802303314, "learning_rate": 2.8370319001386963e-06, "loss": 0.0019, "step": 10340 }, { "epoch": 7.16383789400762, "grad_norm": 0.15159761905670166, "learning_rate": 2.836338418862691e-06, "loss": 0.0018, "step": 10341 }, { "epoch": 7.164530654658815, "grad_norm": 0.08222801238298416, "learning_rate": 2.835644937586685e-06, "loss": 0.0016, "step": 10342 }, { "epoch": 7.1652234153100105, "grad_norm": 0.08563582599163055, "learning_rate": 2.83495145631068e-06, "loss": 0.0014, "step": 10343 }, { "epoch": 7.165916175961206, "grad_norm": 0.24724319577217102, "learning_rate": 2.8342579750346744e-06, "loss": 0.0059, "step": 10344 }, { "epoch": 7.1666089366124, "grad_norm": 0.2744807004928589, "learning_rate": 2.833564493758669e-06, "loss": 0.0035, "step": 10345 }, { "epoch": 7.1673016972635955, "grad_norm": 0.17966726422309875, "learning_rate": 2.832871012482663e-06, "loss": 0.0034, "step": 10346 }, { "epoch": 7.167994457914791, "grad_norm": 0.08111991733312607, "learning_rate": 2.8321775312066575e-06, "loss": 0.0013, "step": 10347 }, { "epoch": 7.168687218565985, "grad_norm": 0.16162936389446259, "learning_rate": 2.8314840499306524e-06, "loss": 0.0021, "step": 10348 }, { "epoch": 7.16937997921718, "grad_norm": 0.10621341317892075, "learning_rate": 2.8307905686546465e-06, "loss": 0.0014, "step": 10349 }, { "epoch": 7.170072739868376, "grad_norm": 0.08825115859508514, "learning_rate": 2.830097087378641e-06, "loss": 0.0015, "step": 10350 }, { "epoch": 7.17076550051957, "grad_norm": 0.06684909760951996, "learning_rate": 2.829403606102635e-06, "loss": 0.0015, "step": 10351 }, { "epoch": 7.171458261170765, "grad_norm": 0.19890569150447845, "learning_rate": 2.8287101248266296e-06, "loss": 0.0023, "step": 10352 }, { "epoch": 7.172151021821961, "grad_norm": 0.17000828683376312, "learning_rate": 2.8280166435506246e-06, "loss": 0.002, "step": 10353 }, { "epoch": 7.172843782473156, "grad_norm": 0.11930099129676819, "learning_rate": 2.827323162274619e-06, "loss": 0.0021, "step": 10354 }, { "epoch": 7.17353654312435, "grad_norm": 0.05308936536312103, "learning_rate": 2.826629680998613e-06, "loss": 0.0013, "step": 10355 }, { "epoch": 7.174229303775546, "grad_norm": 0.06604034453630447, "learning_rate": 2.8259361997226077e-06, "loss": 0.0015, "step": 10356 }, { "epoch": 7.174922064426741, "grad_norm": 0.13203255832195282, "learning_rate": 2.8252427184466017e-06, "loss": 0.0016, "step": 10357 }, { "epoch": 7.175614825077935, "grad_norm": 0.1402018964290619, "learning_rate": 2.8245492371705967e-06, "loss": 0.0026, "step": 10358 }, { "epoch": 7.1763075857291305, "grad_norm": 0.15407191216945648, "learning_rate": 2.823855755894591e-06, "loss": 0.0014, "step": 10359 }, { "epoch": 7.177000346380326, "grad_norm": 0.12346000224351883, "learning_rate": 2.8231622746185857e-06, "loss": 0.0024, "step": 10360 }, { "epoch": 7.17769310703152, "grad_norm": 0.0611688531935215, "learning_rate": 2.8224687933425798e-06, "loss": 0.0015, "step": 10361 }, { "epoch": 7.1783858676827155, "grad_norm": 0.05798180028796196, "learning_rate": 2.8217753120665743e-06, "loss": 0.0015, "step": 10362 }, { "epoch": 7.179078628333911, "grad_norm": 0.09550177305936813, "learning_rate": 2.8210818307905692e-06, "loss": 0.0016, "step": 10363 }, { "epoch": 7.179771388985106, "grad_norm": 0.05496978014707565, "learning_rate": 2.8203883495145633e-06, "loss": 0.0013, "step": 10364 }, { "epoch": 7.1804641496363, "grad_norm": 0.07199720293283463, "learning_rate": 2.819694868238558e-06, "loss": 0.0014, "step": 10365 }, { "epoch": 7.181156910287496, "grad_norm": 0.2144985795021057, "learning_rate": 2.819001386962552e-06, "loss": 0.003, "step": 10366 }, { "epoch": 7.181849670938691, "grad_norm": 0.1355004459619522, "learning_rate": 2.8183079056865464e-06, "loss": 0.0017, "step": 10367 }, { "epoch": 7.182542431589885, "grad_norm": 0.08780473470687866, "learning_rate": 2.8176144244105414e-06, "loss": 0.0022, "step": 10368 }, { "epoch": 7.183235192241081, "grad_norm": 0.1346195638179779, "learning_rate": 2.816920943134536e-06, "loss": 0.0019, "step": 10369 }, { "epoch": 7.183927952892276, "grad_norm": 0.1946592628955841, "learning_rate": 2.81622746185853e-06, "loss": 0.0022, "step": 10370 }, { "epoch": 7.18462071354347, "grad_norm": 0.1239401251077652, "learning_rate": 2.8155339805825245e-06, "loss": 0.0016, "step": 10371 }, { "epoch": 7.185313474194666, "grad_norm": 0.13771218061447144, "learning_rate": 2.8148404993065185e-06, "loss": 0.0018, "step": 10372 }, { "epoch": 7.186006234845861, "grad_norm": 0.15504243969917297, "learning_rate": 2.8141470180305135e-06, "loss": 0.0021, "step": 10373 }, { "epoch": 7.186698995497056, "grad_norm": 0.09864521771669388, "learning_rate": 2.813453536754508e-06, "loss": 0.0015, "step": 10374 }, { "epoch": 7.1873917561482505, "grad_norm": 0.07116943597793579, "learning_rate": 2.812760055478502e-06, "loss": 0.0016, "step": 10375 }, { "epoch": 7.188084516799446, "grad_norm": 0.08147519081830978, "learning_rate": 2.8120665742024966e-06, "loss": 0.0019, "step": 10376 }, { "epoch": 7.188777277450641, "grad_norm": 0.17966683208942413, "learning_rate": 2.811373092926491e-06, "loss": 0.0025, "step": 10377 }, { "epoch": 7.1894700381018355, "grad_norm": 0.06896746158599854, "learning_rate": 2.810679611650486e-06, "loss": 0.0014, "step": 10378 }, { "epoch": 7.190162798753031, "grad_norm": 0.06997902691364288, "learning_rate": 2.80998613037448e-06, "loss": 0.0017, "step": 10379 }, { "epoch": 7.190855559404226, "grad_norm": 0.055900491774082184, "learning_rate": 2.8092926490984746e-06, "loss": 0.0014, "step": 10380 }, { "epoch": 7.19154832005542, "grad_norm": 0.1522582769393921, "learning_rate": 2.8085991678224687e-06, "loss": 0.002, "step": 10381 }, { "epoch": 7.192241080706616, "grad_norm": 0.2514250874519348, "learning_rate": 2.8079056865464632e-06, "loss": 0.0019, "step": 10382 }, { "epoch": 7.192933841357811, "grad_norm": 0.06587401777505875, "learning_rate": 2.807212205270458e-06, "loss": 0.0014, "step": 10383 }, { "epoch": 7.193626602009006, "grad_norm": 0.11855365335941315, "learning_rate": 2.8065187239944527e-06, "loss": 0.0017, "step": 10384 }, { "epoch": 7.194319362660201, "grad_norm": 0.1970524936914444, "learning_rate": 2.8058252427184468e-06, "loss": 0.0019, "step": 10385 }, { "epoch": 7.195012123311396, "grad_norm": 0.4181351959705353, "learning_rate": 2.8051317614424413e-06, "loss": 0.002, "step": 10386 }, { "epoch": 7.195704883962591, "grad_norm": 0.10302127152681351, "learning_rate": 2.8044382801664353e-06, "loss": 0.0017, "step": 10387 }, { "epoch": 7.196397644613786, "grad_norm": 0.13763552904129028, "learning_rate": 2.8037447988904303e-06, "loss": 0.0016, "step": 10388 }, { "epoch": 7.197090405264981, "grad_norm": 0.3948575258255005, "learning_rate": 2.803051317614425e-06, "loss": 0.0024, "step": 10389 }, { "epoch": 7.197783165916176, "grad_norm": 0.5052704811096191, "learning_rate": 2.802357836338419e-06, "loss": 0.0035, "step": 10390 }, { "epoch": 7.198475926567371, "grad_norm": 0.13870610296726227, "learning_rate": 2.8016643550624134e-06, "loss": 0.0023, "step": 10391 }, { "epoch": 7.199168687218566, "grad_norm": 0.08997498452663422, "learning_rate": 2.800970873786408e-06, "loss": 0.0014, "step": 10392 }, { "epoch": 7.199861447869761, "grad_norm": 0.10391823947429657, "learning_rate": 2.800277392510403e-06, "loss": 0.0017, "step": 10393 }, { "epoch": 7.200554208520956, "grad_norm": 0.060874272137880325, "learning_rate": 2.799583911234397e-06, "loss": 0.0015, "step": 10394 }, { "epoch": 7.201246969172151, "grad_norm": 0.2799158990383148, "learning_rate": 2.7988904299583914e-06, "loss": 0.0026, "step": 10395 }, { "epoch": 7.201939729823346, "grad_norm": 0.285510390996933, "learning_rate": 2.7981969486823855e-06, "loss": 0.0029, "step": 10396 }, { "epoch": 7.202632490474541, "grad_norm": 0.15282388031482697, "learning_rate": 2.79750346740638e-06, "loss": 0.0019, "step": 10397 }, { "epoch": 7.203325251125736, "grad_norm": 0.144027441740036, "learning_rate": 2.796809986130375e-06, "loss": 0.0018, "step": 10398 }, { "epoch": 7.204018011776931, "grad_norm": 0.0793803483247757, "learning_rate": 2.796116504854369e-06, "loss": 0.0016, "step": 10399 }, { "epoch": 7.204710772428126, "grad_norm": 0.10301689803600311, "learning_rate": 2.7954230235783636e-06, "loss": 0.0016, "step": 10400 }, { "epoch": 7.205403533079321, "grad_norm": 0.28356364369392395, "learning_rate": 2.794729542302358e-06, "loss": 0.0026, "step": 10401 }, { "epoch": 7.206096293730516, "grad_norm": 0.21687909960746765, "learning_rate": 2.794036061026352e-06, "loss": 0.0032, "step": 10402 }, { "epoch": 7.206789054381711, "grad_norm": 0.08599995076656342, "learning_rate": 2.793342579750347e-06, "loss": 0.0016, "step": 10403 }, { "epoch": 7.2074818150329065, "grad_norm": 0.17210939526557922, "learning_rate": 2.7926490984743416e-06, "loss": 0.0025, "step": 10404 }, { "epoch": 7.208174575684101, "grad_norm": 0.07880869507789612, "learning_rate": 2.7919556171983357e-06, "loss": 0.0016, "step": 10405 }, { "epoch": 7.208867336335296, "grad_norm": 0.12287131696939468, "learning_rate": 2.79126213592233e-06, "loss": 0.002, "step": 10406 }, { "epoch": 7.2095600969864915, "grad_norm": 0.26594921946525574, "learning_rate": 2.7905686546463247e-06, "loss": 0.0017, "step": 10407 }, { "epoch": 7.210252857637686, "grad_norm": 0.07592356950044632, "learning_rate": 2.7898751733703196e-06, "loss": 0.0017, "step": 10408 }, { "epoch": 7.210945618288881, "grad_norm": 0.11640237271785736, "learning_rate": 2.7891816920943137e-06, "loss": 0.0018, "step": 10409 }, { "epoch": 7.211638378940076, "grad_norm": 0.20506124198436737, "learning_rate": 2.7884882108183082e-06, "loss": 0.0024, "step": 10410 }, { "epoch": 7.212331139591271, "grad_norm": 0.15286748111248016, "learning_rate": 2.7877947295423023e-06, "loss": 0.0018, "step": 10411 }, { "epoch": 7.213023900242466, "grad_norm": 0.3817189335823059, "learning_rate": 2.787101248266297e-06, "loss": 0.0034, "step": 10412 }, { "epoch": 7.213716660893661, "grad_norm": 0.11340080946683884, "learning_rate": 2.7864077669902918e-06, "loss": 0.0019, "step": 10413 }, { "epoch": 7.214409421544857, "grad_norm": 0.11808422207832336, "learning_rate": 2.785714285714286e-06, "loss": 0.0016, "step": 10414 }, { "epoch": 7.215102182196051, "grad_norm": 0.099315345287323, "learning_rate": 2.7850208044382804e-06, "loss": 0.0016, "step": 10415 }, { "epoch": 7.215794942847246, "grad_norm": 0.12037818878889084, "learning_rate": 2.784327323162275e-06, "loss": 0.0019, "step": 10416 }, { "epoch": 7.216487703498442, "grad_norm": 0.0628642588853836, "learning_rate": 2.783633841886269e-06, "loss": 0.0014, "step": 10417 }, { "epoch": 7.217180464149636, "grad_norm": 0.12666834890842438, "learning_rate": 2.782940360610264e-06, "loss": 0.0022, "step": 10418 }, { "epoch": 7.217873224800831, "grad_norm": 0.09318286925554276, "learning_rate": 2.7822468793342584e-06, "loss": 0.0015, "step": 10419 }, { "epoch": 7.2185659854520265, "grad_norm": 0.1301775872707367, "learning_rate": 2.7815533980582525e-06, "loss": 0.0028, "step": 10420 }, { "epoch": 7.219258746103221, "grad_norm": 0.11705788224935532, "learning_rate": 2.780859916782247e-06, "loss": 0.0014, "step": 10421 }, { "epoch": 7.219951506754416, "grad_norm": 0.44201797246932983, "learning_rate": 2.7801664355062415e-06, "loss": 0.0016, "step": 10422 }, { "epoch": 7.2206442674056115, "grad_norm": 0.054916124790906906, "learning_rate": 2.779472954230236e-06, "loss": 0.0014, "step": 10423 }, { "epoch": 7.221337028056807, "grad_norm": 0.060670580714941025, "learning_rate": 2.7787794729542305e-06, "loss": 0.0012, "step": 10424 }, { "epoch": 7.222029788708001, "grad_norm": 0.14784590899944305, "learning_rate": 2.778085991678225e-06, "loss": 0.0017, "step": 10425 }, { "epoch": 7.222722549359196, "grad_norm": 0.3116993010044098, "learning_rate": 2.777392510402219e-06, "loss": 0.0026, "step": 10426 }, { "epoch": 7.223415310010392, "grad_norm": 0.10167987644672394, "learning_rate": 2.7766990291262136e-06, "loss": 0.0016, "step": 10427 }, { "epoch": 7.224108070661586, "grad_norm": 0.30445626378059387, "learning_rate": 2.7760055478502086e-06, "loss": 0.0021, "step": 10428 }, { "epoch": 7.224800831312781, "grad_norm": 0.28299471735954285, "learning_rate": 2.7753120665742026e-06, "loss": 0.0025, "step": 10429 }, { "epoch": 7.225493591963977, "grad_norm": 0.1520148664712906, "learning_rate": 2.774618585298197e-06, "loss": 0.002, "step": 10430 }, { "epoch": 7.226186352615171, "grad_norm": 0.21248866617679596, "learning_rate": 2.7739251040221917e-06, "loss": 0.0015, "step": 10431 }, { "epoch": 7.226879113266366, "grad_norm": 0.14709307253360748, "learning_rate": 2.7732316227461858e-06, "loss": 0.0022, "step": 10432 }, { "epoch": 7.227571873917562, "grad_norm": 0.07308857142925262, "learning_rate": 2.7725381414701807e-06, "loss": 0.0013, "step": 10433 }, { "epoch": 7.228264634568757, "grad_norm": 0.14815136790275574, "learning_rate": 2.771844660194175e-06, "loss": 0.0019, "step": 10434 }, { "epoch": 7.228957395219951, "grad_norm": 0.09827826917171478, "learning_rate": 2.7711511789181693e-06, "loss": 0.0016, "step": 10435 }, { "epoch": 7.229650155871147, "grad_norm": 0.12288874387741089, "learning_rate": 2.770457697642164e-06, "loss": 0.0018, "step": 10436 }, { "epoch": 7.230342916522342, "grad_norm": 0.14140482246875763, "learning_rate": 2.7697642163661583e-06, "loss": 0.0019, "step": 10437 }, { "epoch": 7.231035677173536, "grad_norm": 0.06752082705497742, "learning_rate": 2.769070735090153e-06, "loss": 0.0013, "step": 10438 }, { "epoch": 7.2317284378247315, "grad_norm": 0.14494122564792633, "learning_rate": 2.7683772538141473e-06, "loss": 0.0017, "step": 10439 }, { "epoch": 7.232421198475927, "grad_norm": 0.13115465641021729, "learning_rate": 2.767683772538142e-06, "loss": 0.0016, "step": 10440 }, { "epoch": 7.233113959127121, "grad_norm": 0.17863360047340393, "learning_rate": 2.766990291262136e-06, "loss": 0.002, "step": 10441 }, { "epoch": 7.2338067197783165, "grad_norm": 0.09369208663702011, "learning_rate": 2.7662968099861304e-06, "loss": 0.0018, "step": 10442 }, { "epoch": 7.234499480429512, "grad_norm": 0.10856392979621887, "learning_rate": 2.7656033287101254e-06, "loss": 0.0015, "step": 10443 }, { "epoch": 7.235192241080707, "grad_norm": 0.10256483405828476, "learning_rate": 2.7649098474341194e-06, "loss": 0.0016, "step": 10444 }, { "epoch": 7.235885001731901, "grad_norm": 0.08446470648050308, "learning_rate": 2.764216366158114e-06, "loss": 0.0015, "step": 10445 }, { "epoch": 7.236577762383097, "grad_norm": 0.06964675337076187, "learning_rate": 2.7635228848821085e-06, "loss": 0.0014, "step": 10446 }, { "epoch": 7.237270523034292, "grad_norm": 0.09242125600576401, "learning_rate": 2.7628294036061026e-06, "loss": 0.0019, "step": 10447 }, { "epoch": 7.237963283685486, "grad_norm": 0.18718264997005463, "learning_rate": 2.7621359223300975e-06, "loss": 0.0025, "step": 10448 }, { "epoch": 7.238656044336682, "grad_norm": 0.09490001946687698, "learning_rate": 2.761442441054092e-06, "loss": 0.0016, "step": 10449 }, { "epoch": 7.239348804987877, "grad_norm": 0.12201209366321564, "learning_rate": 2.760748959778086e-06, "loss": 0.0015, "step": 10450 }, { "epoch": 7.240041565639071, "grad_norm": 0.15196803212165833, "learning_rate": 2.7600554785020806e-06, "loss": 0.0019, "step": 10451 }, { "epoch": 7.240734326290267, "grad_norm": 0.07299428433179855, "learning_rate": 2.759361997226075e-06, "loss": 0.0015, "step": 10452 }, { "epoch": 7.241427086941462, "grad_norm": 0.10037188977003098, "learning_rate": 2.7586685159500696e-06, "loss": 0.0016, "step": 10453 }, { "epoch": 7.242119847592657, "grad_norm": 0.11262709647417068, "learning_rate": 2.757975034674064e-06, "loss": 0.0018, "step": 10454 }, { "epoch": 7.2428126082438515, "grad_norm": 0.1267578899860382, "learning_rate": 2.7572815533980586e-06, "loss": 0.0017, "step": 10455 }, { "epoch": 7.243505368895047, "grad_norm": 0.1580943912267685, "learning_rate": 2.7565880721220527e-06, "loss": 0.002, "step": 10456 }, { "epoch": 7.244198129546242, "grad_norm": 0.13367560505867004, "learning_rate": 2.7558945908460472e-06, "loss": 0.003, "step": 10457 }, { "epoch": 7.2448908901974365, "grad_norm": 0.06954680383205414, "learning_rate": 2.755201109570042e-06, "loss": 0.0014, "step": 10458 }, { "epoch": 7.245583650848632, "grad_norm": 0.0899452492594719, "learning_rate": 2.7545076282940362e-06, "loss": 0.0017, "step": 10459 }, { "epoch": 7.246276411499827, "grad_norm": 0.3852347731590271, "learning_rate": 2.7538141470180308e-06, "loss": 0.0024, "step": 10460 }, { "epoch": 7.246969172151021, "grad_norm": 0.31897953152656555, "learning_rate": 2.7531206657420253e-06, "loss": 0.0022, "step": 10461 }, { "epoch": 7.247661932802217, "grad_norm": 0.16231410205364227, "learning_rate": 2.7524271844660194e-06, "loss": 0.0017, "step": 10462 }, { "epoch": 7.248354693453412, "grad_norm": 0.1474291831254959, "learning_rate": 2.7517337031900143e-06, "loss": 0.0019, "step": 10463 }, { "epoch": 7.249047454104607, "grad_norm": 0.13489697873592377, "learning_rate": 2.751040221914009e-06, "loss": 0.002, "step": 10464 }, { "epoch": 7.249740214755802, "grad_norm": 0.15055708587169647, "learning_rate": 2.750346740638003e-06, "loss": 0.0017, "step": 10465 }, { "epoch": 7.250432975406997, "grad_norm": 0.09993751347064972, "learning_rate": 2.7496532593619974e-06, "loss": 0.0016, "step": 10466 }, { "epoch": 7.251125736058192, "grad_norm": 0.09920458495616913, "learning_rate": 2.7489597780859915e-06, "loss": 0.0017, "step": 10467 }, { "epoch": 7.251818496709387, "grad_norm": 0.08457056432962418, "learning_rate": 2.7482662968099864e-06, "loss": 0.0016, "step": 10468 }, { "epoch": 7.252511257360582, "grad_norm": 0.15224045515060425, "learning_rate": 2.747572815533981e-06, "loss": 0.002, "step": 10469 }, { "epoch": 7.253204018011777, "grad_norm": 0.12078787386417389, "learning_rate": 2.7468793342579754e-06, "loss": 0.0015, "step": 10470 }, { "epoch": 7.253896778662972, "grad_norm": 0.1865270733833313, "learning_rate": 2.7461858529819695e-06, "loss": 0.0031, "step": 10471 }, { "epoch": 7.254589539314167, "grad_norm": 0.1568903774023056, "learning_rate": 2.745492371705964e-06, "loss": 0.0035, "step": 10472 }, { "epoch": 7.255282299965362, "grad_norm": 0.14186471700668335, "learning_rate": 2.744798890429959e-06, "loss": 0.0021, "step": 10473 }, { "epoch": 7.255975060616557, "grad_norm": 0.09129304438829422, "learning_rate": 2.744105409153953e-06, "loss": 0.0016, "step": 10474 }, { "epoch": 7.256667821267752, "grad_norm": 0.48369312286376953, "learning_rate": 2.7434119278779476e-06, "loss": 0.0029, "step": 10475 }, { "epoch": 7.257360581918947, "grad_norm": 0.07339184731245041, "learning_rate": 2.742718446601942e-06, "loss": 0.0015, "step": 10476 }, { "epoch": 7.258053342570142, "grad_norm": 0.07599583268165588, "learning_rate": 2.742024965325936e-06, "loss": 0.0014, "step": 10477 }, { "epoch": 7.258746103221337, "grad_norm": 0.1359255611896515, "learning_rate": 2.741331484049931e-06, "loss": 0.0019, "step": 10478 }, { "epoch": 7.259438863872532, "grad_norm": 0.09700708836317062, "learning_rate": 2.7406380027739256e-06, "loss": 0.0017, "step": 10479 }, { "epoch": 7.260131624523727, "grad_norm": 0.2100406438112259, "learning_rate": 2.7399445214979197e-06, "loss": 0.0031, "step": 10480 }, { "epoch": 7.260824385174922, "grad_norm": 0.12372469156980515, "learning_rate": 2.739251040221914e-06, "loss": 0.0016, "step": 10481 }, { "epoch": 7.261517145826117, "grad_norm": 0.0920005515217781, "learning_rate": 2.7385575589459083e-06, "loss": 0.0016, "step": 10482 }, { "epoch": 7.262209906477312, "grad_norm": 0.11117003113031387, "learning_rate": 2.7378640776699032e-06, "loss": 0.0018, "step": 10483 }, { "epoch": 7.2629026671285075, "grad_norm": 0.0973900705575943, "learning_rate": 2.7371705963938977e-06, "loss": 0.0018, "step": 10484 }, { "epoch": 7.263595427779702, "grad_norm": 0.1261003464460373, "learning_rate": 2.7364771151178922e-06, "loss": 0.0017, "step": 10485 }, { "epoch": 7.264288188430897, "grad_norm": 0.06539832800626755, "learning_rate": 2.7357836338418863e-06, "loss": 0.0015, "step": 10486 }, { "epoch": 7.2649809490820925, "grad_norm": 0.09684585779905319, "learning_rate": 2.735090152565881e-06, "loss": 0.0015, "step": 10487 }, { "epoch": 7.265673709733287, "grad_norm": 0.09772973507642746, "learning_rate": 2.7343966712898758e-06, "loss": 0.0016, "step": 10488 }, { "epoch": 7.266366470384482, "grad_norm": 0.12444275617599487, "learning_rate": 2.73370319001387e-06, "loss": 0.0016, "step": 10489 }, { "epoch": 7.267059231035677, "grad_norm": 0.09388227760791779, "learning_rate": 2.7330097087378644e-06, "loss": 0.0016, "step": 10490 }, { "epoch": 7.267751991686872, "grad_norm": 0.10781733691692352, "learning_rate": 2.7323162274618584e-06, "loss": 0.002, "step": 10491 }, { "epoch": 7.268444752338067, "grad_norm": 0.26906630396842957, "learning_rate": 2.731622746185853e-06, "loss": 0.0023, "step": 10492 }, { "epoch": 7.269137512989262, "grad_norm": 0.10488678514957428, "learning_rate": 2.730929264909848e-06, "loss": 0.0016, "step": 10493 }, { "epoch": 7.269830273640457, "grad_norm": 0.18185140192508698, "learning_rate": 2.7302357836338424e-06, "loss": 0.0029, "step": 10494 }, { "epoch": 7.270523034291652, "grad_norm": 0.09366568177938461, "learning_rate": 2.7295423023578365e-06, "loss": 0.0014, "step": 10495 }, { "epoch": 7.271215794942847, "grad_norm": 0.7352327704429626, "learning_rate": 2.728848821081831e-06, "loss": 0.0024, "step": 10496 }, { "epoch": 7.271908555594043, "grad_norm": 0.20652872323989868, "learning_rate": 2.728155339805825e-06, "loss": 0.0019, "step": 10497 }, { "epoch": 7.272601316245237, "grad_norm": 0.0690193846821785, "learning_rate": 2.72746185852982e-06, "loss": 0.0013, "step": 10498 }, { "epoch": 7.273294076896432, "grad_norm": 0.1919272243976593, "learning_rate": 2.7267683772538145e-06, "loss": 0.0018, "step": 10499 }, { "epoch": 7.2739868375476275, "grad_norm": 0.09269005060195923, "learning_rate": 2.726074895977809e-06, "loss": 0.0014, "step": 10500 }, { "epoch": 7.274679598198822, "grad_norm": 0.15102043747901917, "learning_rate": 2.725381414701803e-06, "loss": 0.002, "step": 10501 }, { "epoch": 7.275372358850017, "grad_norm": 0.1863505244255066, "learning_rate": 2.7246879334257976e-06, "loss": 0.0032, "step": 10502 }, { "epoch": 7.2760651195012125, "grad_norm": 0.2828274965286255, "learning_rate": 2.7239944521497926e-06, "loss": 0.0019, "step": 10503 }, { "epoch": 7.276757880152408, "grad_norm": 0.1548154056072235, "learning_rate": 2.7233009708737867e-06, "loss": 0.0018, "step": 10504 }, { "epoch": 7.277450640803602, "grad_norm": 0.11284724622964859, "learning_rate": 2.722607489597781e-06, "loss": 0.0016, "step": 10505 }, { "epoch": 7.278143401454797, "grad_norm": 0.09580027312040329, "learning_rate": 2.7219140083217752e-06, "loss": 0.0018, "step": 10506 }, { "epoch": 7.278836162105993, "grad_norm": 0.2197761833667755, "learning_rate": 2.7212205270457698e-06, "loss": 0.0021, "step": 10507 }, { "epoch": 7.279528922757187, "grad_norm": 0.08582144975662231, "learning_rate": 2.7205270457697647e-06, "loss": 0.0016, "step": 10508 }, { "epoch": 7.280221683408382, "grad_norm": 0.33913543820381165, "learning_rate": 2.719833564493759e-06, "loss": 0.0018, "step": 10509 }, { "epoch": 7.280914444059578, "grad_norm": 0.6199621558189392, "learning_rate": 2.7191400832177533e-06, "loss": 0.0022, "step": 10510 }, { "epoch": 7.281607204710772, "grad_norm": 0.05950072035193443, "learning_rate": 2.718446601941748e-06, "loss": 0.0014, "step": 10511 }, { "epoch": 7.282299965361967, "grad_norm": 0.09101162105798721, "learning_rate": 2.717753120665742e-06, "loss": 0.0019, "step": 10512 }, { "epoch": 7.282992726013163, "grad_norm": 0.0685565322637558, "learning_rate": 2.717059639389737e-06, "loss": 0.0016, "step": 10513 }, { "epoch": 7.283685486664357, "grad_norm": 0.3845399022102356, "learning_rate": 2.7163661581137313e-06, "loss": 0.0029, "step": 10514 }, { "epoch": 7.284378247315552, "grad_norm": 0.1877196878194809, "learning_rate": 2.7156726768377254e-06, "loss": 0.0026, "step": 10515 }, { "epoch": 7.285071007966748, "grad_norm": 0.29697221517562866, "learning_rate": 2.71497919556172e-06, "loss": 0.0033, "step": 10516 }, { "epoch": 7.285763768617943, "grad_norm": 0.2284240871667862, "learning_rate": 2.7142857142857144e-06, "loss": 0.0022, "step": 10517 }, { "epoch": 7.286456529269137, "grad_norm": 0.2014724463224411, "learning_rate": 2.7135922330097094e-06, "loss": 0.0024, "step": 10518 }, { "epoch": 7.2871492899203325, "grad_norm": 0.12144462764263153, "learning_rate": 2.7128987517337035e-06, "loss": 0.002, "step": 10519 }, { "epoch": 7.287842050571528, "grad_norm": 0.15588727593421936, "learning_rate": 2.712205270457698e-06, "loss": 0.0021, "step": 10520 }, { "epoch": 7.288534811222722, "grad_norm": 0.07342936098575592, "learning_rate": 2.711511789181692e-06, "loss": 0.0016, "step": 10521 }, { "epoch": 7.2892275718739175, "grad_norm": 0.09196361899375916, "learning_rate": 2.7108183079056866e-06, "loss": 0.0017, "step": 10522 }, { "epoch": 7.289920332525113, "grad_norm": 0.4919677674770355, "learning_rate": 2.7101248266296815e-06, "loss": 0.0026, "step": 10523 }, { "epoch": 7.290613093176308, "grad_norm": 0.13308435678482056, "learning_rate": 2.709431345353676e-06, "loss": 0.0017, "step": 10524 }, { "epoch": 7.291305853827502, "grad_norm": 0.09319281578063965, "learning_rate": 2.70873786407767e-06, "loss": 0.0016, "step": 10525 }, { "epoch": 7.291998614478698, "grad_norm": 0.07991555333137512, "learning_rate": 2.7080443828016646e-06, "loss": 0.0015, "step": 10526 }, { "epoch": 7.292691375129893, "grad_norm": 0.6627943515777588, "learning_rate": 2.7073509015256587e-06, "loss": 0.002, "step": 10527 }, { "epoch": 7.293384135781087, "grad_norm": 0.1489666849374771, "learning_rate": 2.7066574202496536e-06, "loss": 0.0028, "step": 10528 }, { "epoch": 7.294076896432283, "grad_norm": 0.1293557733297348, "learning_rate": 2.705963938973648e-06, "loss": 0.0019, "step": 10529 }, { "epoch": 7.294769657083478, "grad_norm": 0.10962218791246414, "learning_rate": 2.7052704576976422e-06, "loss": 0.0017, "step": 10530 }, { "epoch": 7.295462417734672, "grad_norm": 0.11898452043533325, "learning_rate": 2.7045769764216367e-06, "loss": 0.0017, "step": 10531 }, { "epoch": 7.296155178385868, "grad_norm": 0.3060879111289978, "learning_rate": 2.7038834951456312e-06, "loss": 0.0035, "step": 10532 }, { "epoch": 7.296847939037063, "grad_norm": 0.17742903530597687, "learning_rate": 2.703190013869626e-06, "loss": 0.0017, "step": 10533 }, { "epoch": 7.297540699688257, "grad_norm": 0.1303330510854721, "learning_rate": 2.7024965325936203e-06, "loss": 0.0023, "step": 10534 }, { "epoch": 7.2982334603394525, "grad_norm": 0.24945330619812012, "learning_rate": 2.7018030513176148e-06, "loss": 0.0025, "step": 10535 }, { "epoch": 7.298926220990648, "grad_norm": 0.13665013015270233, "learning_rate": 2.701109570041609e-06, "loss": 0.0024, "step": 10536 }, { "epoch": 7.299618981641843, "grad_norm": 0.11649353802204132, "learning_rate": 2.7004160887656034e-06, "loss": 0.0017, "step": 10537 }, { "epoch": 7.3003117422930375, "grad_norm": 0.10994106531143188, "learning_rate": 2.6997226074895983e-06, "loss": 0.0016, "step": 10538 }, { "epoch": 7.301004502944233, "grad_norm": 0.11216150224208832, "learning_rate": 2.6990291262135924e-06, "loss": 0.0016, "step": 10539 }, { "epoch": 7.301697263595428, "grad_norm": 0.1115545853972435, "learning_rate": 2.698335644937587e-06, "loss": 0.0018, "step": 10540 }, { "epoch": 7.302390024246622, "grad_norm": 0.09486105293035507, "learning_rate": 2.6976421636615814e-06, "loss": 0.0017, "step": 10541 }, { "epoch": 7.303082784897818, "grad_norm": 0.10198003798723221, "learning_rate": 2.6969486823855755e-06, "loss": 0.0016, "step": 10542 }, { "epoch": 7.303775545549013, "grad_norm": 0.08961523324251175, "learning_rate": 2.6962552011095704e-06, "loss": 0.0017, "step": 10543 }, { "epoch": 7.304468306200208, "grad_norm": 0.2805187702178955, "learning_rate": 2.695561719833565e-06, "loss": 0.0019, "step": 10544 }, { "epoch": 7.305161066851403, "grad_norm": 0.1289357841014862, "learning_rate": 2.694868238557559e-06, "loss": 0.0021, "step": 10545 }, { "epoch": 7.305853827502598, "grad_norm": 0.32471606135368347, "learning_rate": 2.6941747572815535e-06, "loss": 0.0024, "step": 10546 }, { "epoch": 7.306546588153793, "grad_norm": 0.09396038949489594, "learning_rate": 2.693481276005548e-06, "loss": 0.0015, "step": 10547 }, { "epoch": 7.307239348804988, "grad_norm": 0.1599978804588318, "learning_rate": 2.692787794729543e-06, "loss": 0.0022, "step": 10548 }, { "epoch": 7.307932109456183, "grad_norm": 0.1154475212097168, "learning_rate": 2.692094313453537e-06, "loss": 0.0019, "step": 10549 }, { "epoch": 7.308624870107378, "grad_norm": 0.14504484832286835, "learning_rate": 2.6914008321775316e-06, "loss": 0.0017, "step": 10550 }, { "epoch": 7.3093176307585725, "grad_norm": 0.10207211971282959, "learning_rate": 2.6907073509015257e-06, "loss": 0.0016, "step": 10551 }, { "epoch": 7.310010391409768, "grad_norm": 0.08849812299013138, "learning_rate": 2.69001386962552e-06, "loss": 0.0016, "step": 10552 }, { "epoch": 7.310703152060963, "grad_norm": 0.14090637862682343, "learning_rate": 2.689320388349515e-06, "loss": 0.0016, "step": 10553 }, { "epoch": 7.3113959127121575, "grad_norm": 0.14641524851322174, "learning_rate": 2.688626907073509e-06, "loss": 0.0016, "step": 10554 }, { "epoch": 7.312088673363353, "grad_norm": 0.18492135405540466, "learning_rate": 2.6879334257975037e-06, "loss": 0.0033, "step": 10555 }, { "epoch": 7.312781434014548, "grad_norm": 0.07164610177278519, "learning_rate": 2.687239944521498e-06, "loss": 0.0014, "step": 10556 }, { "epoch": 7.313474194665743, "grad_norm": 0.09530164301395416, "learning_rate": 2.6865464632454923e-06, "loss": 0.0016, "step": 10557 }, { "epoch": 7.314166955316938, "grad_norm": 0.09830351918935776, "learning_rate": 2.6858529819694872e-06, "loss": 0.0014, "step": 10558 }, { "epoch": 7.314859715968133, "grad_norm": 0.13945689797401428, "learning_rate": 2.6851595006934817e-06, "loss": 0.0025, "step": 10559 }, { "epoch": 7.315552476619328, "grad_norm": 0.157485231757164, "learning_rate": 2.684466019417476e-06, "loss": 0.0022, "step": 10560 }, { "epoch": 7.316245237270523, "grad_norm": 0.16562585532665253, "learning_rate": 2.6837725381414703e-06, "loss": 0.0018, "step": 10561 }, { "epoch": 7.316937997921718, "grad_norm": 0.20526468753814697, "learning_rate": 2.683079056865465e-06, "loss": 0.0013, "step": 10562 }, { "epoch": 7.317630758572913, "grad_norm": 0.0601305291056633, "learning_rate": 2.6823855755894593e-06, "loss": 0.0013, "step": 10563 }, { "epoch": 7.3183235192241085, "grad_norm": 0.12286640703678131, "learning_rate": 2.681692094313454e-06, "loss": 0.0017, "step": 10564 }, { "epoch": 7.319016279875303, "grad_norm": 0.08743162453174591, "learning_rate": 2.6809986130374484e-06, "loss": 0.0016, "step": 10565 }, { "epoch": 7.319709040526498, "grad_norm": 0.2952720820903778, "learning_rate": 2.6803051317614425e-06, "loss": 0.0022, "step": 10566 }, { "epoch": 7.3204018011776935, "grad_norm": 0.1119878888130188, "learning_rate": 2.679611650485437e-06, "loss": 0.0023, "step": 10567 }, { "epoch": 7.321094561828888, "grad_norm": 0.10538679361343384, "learning_rate": 2.678918169209432e-06, "loss": 0.0014, "step": 10568 }, { "epoch": 7.321787322480083, "grad_norm": 0.4837396442890167, "learning_rate": 2.678224687933426e-06, "loss": 0.0018, "step": 10569 }, { "epoch": 7.322480083131278, "grad_norm": 0.10609745234251022, "learning_rate": 2.6775312066574205e-06, "loss": 0.0015, "step": 10570 }, { "epoch": 7.323172843782473, "grad_norm": 0.08468212187290192, "learning_rate": 2.676837725381415e-06, "loss": 0.0015, "step": 10571 }, { "epoch": 7.323865604433668, "grad_norm": 0.17318294942378998, "learning_rate": 2.676144244105409e-06, "loss": 0.0027, "step": 10572 }, { "epoch": 7.324558365084863, "grad_norm": 0.15581314265727997, "learning_rate": 2.675450762829404e-06, "loss": 0.0019, "step": 10573 }, { "epoch": 7.325251125736058, "grad_norm": 0.17034558951854706, "learning_rate": 2.6747572815533985e-06, "loss": 0.0019, "step": 10574 }, { "epoch": 7.325943886387253, "grad_norm": 0.17120777070522308, "learning_rate": 2.6740638002773926e-06, "loss": 0.0022, "step": 10575 }, { "epoch": 7.326636647038448, "grad_norm": 0.17404325306415558, "learning_rate": 2.673370319001387e-06, "loss": 0.0016, "step": 10576 }, { "epoch": 7.327329407689644, "grad_norm": 0.20107604563236237, "learning_rate": 2.6726768377253816e-06, "loss": 0.0022, "step": 10577 }, { "epoch": 7.328022168340838, "grad_norm": 0.11374718695878983, "learning_rate": 2.671983356449376e-06, "loss": 0.0014, "step": 10578 }, { "epoch": 7.328714928992033, "grad_norm": 0.06300130486488342, "learning_rate": 2.6712898751733707e-06, "loss": 0.0014, "step": 10579 }, { "epoch": 7.3294076896432285, "grad_norm": 0.13866983354091644, "learning_rate": 2.670596393897365e-06, "loss": 0.0022, "step": 10580 }, { "epoch": 7.330100450294423, "grad_norm": 0.09184422343969345, "learning_rate": 2.6699029126213593e-06, "loss": 0.0016, "step": 10581 }, { "epoch": 7.330793210945618, "grad_norm": 0.2565865218639374, "learning_rate": 2.6692094313453538e-06, "loss": 0.002, "step": 10582 }, { "epoch": 7.3314859715968135, "grad_norm": 0.12624496221542358, "learning_rate": 2.6685159500693487e-06, "loss": 0.0018, "step": 10583 }, { "epoch": 7.332178732248009, "grad_norm": 0.3806810677051544, "learning_rate": 2.6678224687933428e-06, "loss": 0.0024, "step": 10584 }, { "epoch": 7.332871492899203, "grad_norm": 0.5169533491134644, "learning_rate": 2.6671289875173373e-06, "loss": 0.0029, "step": 10585 }, { "epoch": 7.333564253550398, "grad_norm": 0.1569748967885971, "learning_rate": 2.666435506241332e-06, "loss": 0.0017, "step": 10586 }, { "epoch": 7.334257014201594, "grad_norm": 0.10726001113653183, "learning_rate": 2.665742024965326e-06, "loss": 0.0013, "step": 10587 }, { "epoch": 7.334949774852788, "grad_norm": 0.09763527661561966, "learning_rate": 2.665048543689321e-06, "loss": 0.0016, "step": 10588 }, { "epoch": 7.335642535503983, "grad_norm": 0.08869840949773788, "learning_rate": 2.6643550624133153e-06, "loss": 0.0018, "step": 10589 }, { "epoch": 7.336335296155179, "grad_norm": 0.15589062869548798, "learning_rate": 2.6636615811373094e-06, "loss": 0.0025, "step": 10590 }, { "epoch": 7.337028056806373, "grad_norm": 0.06364841014146805, "learning_rate": 2.662968099861304e-06, "loss": 0.0013, "step": 10591 }, { "epoch": 7.337720817457568, "grad_norm": 0.1893458515405655, "learning_rate": 2.6622746185852984e-06, "loss": 0.002, "step": 10592 }, { "epoch": 7.338413578108764, "grad_norm": 0.11292804777622223, "learning_rate": 2.661581137309293e-06, "loss": 0.0015, "step": 10593 }, { "epoch": 7.339106338759958, "grad_norm": 0.10484236478805542, "learning_rate": 2.6608876560332875e-06, "loss": 0.002, "step": 10594 }, { "epoch": 7.339799099411153, "grad_norm": 0.06305152177810669, "learning_rate": 2.660194174757282e-06, "loss": 0.0013, "step": 10595 }, { "epoch": 7.3404918600623486, "grad_norm": 0.10523684322834015, "learning_rate": 2.659500693481276e-06, "loss": 0.0017, "step": 10596 }, { "epoch": 7.341184620713544, "grad_norm": 0.10294146835803986, "learning_rate": 2.6588072122052706e-06, "loss": 0.0016, "step": 10597 }, { "epoch": 7.341877381364738, "grad_norm": 0.11167468130588531, "learning_rate": 2.6581137309292655e-06, "loss": 0.0014, "step": 10598 }, { "epoch": 7.3425701420159335, "grad_norm": 0.21104352176189423, "learning_rate": 2.6574202496532596e-06, "loss": 0.0021, "step": 10599 }, { "epoch": 7.343262902667129, "grad_norm": 0.09167628735303879, "learning_rate": 2.656726768377254e-06, "loss": 0.0014, "step": 10600 }, { "epoch": 7.343955663318323, "grad_norm": 0.1054004430770874, "learning_rate": 2.6560332871012486e-06, "loss": 0.0014, "step": 10601 }, { "epoch": 7.3446484239695184, "grad_norm": 0.16618290543556213, "learning_rate": 2.6553398058252427e-06, "loss": 0.0025, "step": 10602 }, { "epoch": 7.345341184620714, "grad_norm": 0.14775697886943817, "learning_rate": 2.6546463245492376e-06, "loss": 0.0014, "step": 10603 }, { "epoch": 7.346033945271909, "grad_norm": 0.11632692068815231, "learning_rate": 2.653952843273232e-06, "loss": 0.0019, "step": 10604 }, { "epoch": 7.346726705923103, "grad_norm": 0.08726738393306732, "learning_rate": 2.6532593619972262e-06, "loss": 0.0015, "step": 10605 }, { "epoch": 7.347419466574299, "grad_norm": 0.1897185891866684, "learning_rate": 2.6525658807212207e-06, "loss": 0.0019, "step": 10606 }, { "epoch": 7.348112227225494, "grad_norm": 0.13701261579990387, "learning_rate": 2.651872399445215e-06, "loss": 0.0024, "step": 10607 }, { "epoch": 7.348804987876688, "grad_norm": 0.09947992116212845, "learning_rate": 2.6511789181692098e-06, "loss": 0.0018, "step": 10608 }, { "epoch": 7.349497748527884, "grad_norm": 0.09648267179727554, "learning_rate": 2.6504854368932043e-06, "loss": 0.0017, "step": 10609 }, { "epoch": 7.350190509179079, "grad_norm": 0.33638474345207214, "learning_rate": 2.6497919556171988e-06, "loss": 0.0022, "step": 10610 }, { "epoch": 7.350883269830273, "grad_norm": 0.12255071103572845, "learning_rate": 2.649098474341193e-06, "loss": 0.0027, "step": 10611 }, { "epoch": 7.351576030481469, "grad_norm": 0.16847288608551025, "learning_rate": 2.6484049930651874e-06, "loss": 0.002, "step": 10612 }, { "epoch": 7.352268791132664, "grad_norm": 0.14912553131580353, "learning_rate": 2.6477115117891823e-06, "loss": 0.0025, "step": 10613 }, { "epoch": 7.352961551783858, "grad_norm": 0.09489867091178894, "learning_rate": 2.6470180305131764e-06, "loss": 0.0016, "step": 10614 }, { "epoch": 7.3536543124350535, "grad_norm": 0.21695023775100708, "learning_rate": 2.646324549237171e-06, "loss": 0.0022, "step": 10615 }, { "epoch": 7.354347073086249, "grad_norm": 0.13073423504829407, "learning_rate": 2.6456310679611654e-06, "loss": 0.0019, "step": 10616 }, { "epoch": 7.355039833737444, "grad_norm": 0.12255122512578964, "learning_rate": 2.6449375866851595e-06, "loss": 0.0015, "step": 10617 }, { "epoch": 7.3557325943886385, "grad_norm": 0.11707830429077148, "learning_rate": 2.6442441054091544e-06, "loss": 0.0016, "step": 10618 }, { "epoch": 7.356425355039834, "grad_norm": 0.08144792169332504, "learning_rate": 2.643550624133149e-06, "loss": 0.0016, "step": 10619 }, { "epoch": 7.357118115691029, "grad_norm": 0.09570600837469101, "learning_rate": 2.642857142857143e-06, "loss": 0.0017, "step": 10620 }, { "epoch": 7.357810876342223, "grad_norm": 0.08857254683971405, "learning_rate": 2.6421636615811375e-06, "loss": 0.0016, "step": 10621 }, { "epoch": 7.358503636993419, "grad_norm": 0.07691531628370285, "learning_rate": 2.6414701803051316e-06, "loss": 0.0015, "step": 10622 }, { "epoch": 7.359196397644614, "grad_norm": 0.07823880016803741, "learning_rate": 2.6407766990291266e-06, "loss": 0.0014, "step": 10623 }, { "epoch": 7.359889158295809, "grad_norm": 0.08908458799123764, "learning_rate": 2.640083217753121e-06, "loss": 0.0015, "step": 10624 }, { "epoch": 7.360581918947004, "grad_norm": 0.176316499710083, "learning_rate": 2.6393897364771156e-06, "loss": 0.0032, "step": 10625 }, { "epoch": 7.361274679598199, "grad_norm": 0.15909786522388458, "learning_rate": 2.6386962552011097e-06, "loss": 0.0017, "step": 10626 }, { "epoch": 7.361967440249394, "grad_norm": 0.21318186819553375, "learning_rate": 2.638002773925104e-06, "loss": 0.0022, "step": 10627 }, { "epoch": 7.362660200900589, "grad_norm": 0.09125927835702896, "learning_rate": 2.637309292649099e-06, "loss": 0.0012, "step": 10628 }, { "epoch": 7.363352961551784, "grad_norm": 0.2801209092140198, "learning_rate": 2.636615811373093e-06, "loss": 0.0023, "step": 10629 }, { "epoch": 7.364045722202979, "grad_norm": 0.10344817489385605, "learning_rate": 2.6359223300970877e-06, "loss": 0.0015, "step": 10630 }, { "epoch": 7.3647384828541735, "grad_norm": 0.12958082556724548, "learning_rate": 2.6352288488210818e-06, "loss": 0.0024, "step": 10631 }, { "epoch": 7.365431243505369, "grad_norm": 0.06534534692764282, "learning_rate": 2.6345353675450763e-06, "loss": 0.0015, "step": 10632 }, { "epoch": 7.366124004156564, "grad_norm": 0.32471805810928345, "learning_rate": 2.6338418862690712e-06, "loss": 0.0039, "step": 10633 }, { "epoch": 7.3668167648077585, "grad_norm": 0.14212051033973694, "learning_rate": 2.6331484049930657e-06, "loss": 0.0016, "step": 10634 }, { "epoch": 7.367509525458954, "grad_norm": 0.10433334857225418, "learning_rate": 2.63245492371706e-06, "loss": 0.0014, "step": 10635 }, { "epoch": 7.368202286110149, "grad_norm": 0.10529961436986923, "learning_rate": 2.6317614424410543e-06, "loss": 0.0017, "step": 10636 }, { "epoch": 7.368895046761344, "grad_norm": 0.0966438427567482, "learning_rate": 2.6310679611650484e-06, "loss": 0.0019, "step": 10637 }, { "epoch": 7.369587807412539, "grad_norm": 0.05615049600601196, "learning_rate": 2.6303744798890434e-06, "loss": 0.0014, "step": 10638 }, { "epoch": 7.370280568063734, "grad_norm": 0.11485607177019119, "learning_rate": 2.629680998613038e-06, "loss": 0.0021, "step": 10639 }, { "epoch": 7.370973328714929, "grad_norm": 0.2299732118844986, "learning_rate": 2.6289875173370324e-06, "loss": 0.0022, "step": 10640 }, { "epoch": 7.371666089366124, "grad_norm": 0.1310017853975296, "learning_rate": 2.6282940360610265e-06, "loss": 0.0037, "step": 10641 }, { "epoch": 7.372358850017319, "grad_norm": 0.11050750315189362, "learning_rate": 2.627600554785021e-06, "loss": 0.0017, "step": 10642 }, { "epoch": 7.373051610668514, "grad_norm": 0.08821465075016022, "learning_rate": 2.626907073509016e-06, "loss": 0.0017, "step": 10643 }, { "epoch": 7.3737443713197095, "grad_norm": 0.09382601827383041, "learning_rate": 2.62621359223301e-06, "loss": 0.0015, "step": 10644 }, { "epoch": 7.374437131970904, "grad_norm": 0.16105882823467255, "learning_rate": 2.6255201109570045e-06, "loss": 0.002, "step": 10645 }, { "epoch": 7.375129892622099, "grad_norm": 0.18233506381511688, "learning_rate": 2.6248266296809986e-06, "loss": 0.0021, "step": 10646 }, { "epoch": 7.3758226532732944, "grad_norm": 0.07966306805610657, "learning_rate": 2.624133148404993e-06, "loss": 0.0014, "step": 10647 }, { "epoch": 7.376515413924489, "grad_norm": 0.2737707793712616, "learning_rate": 2.6234396671289876e-06, "loss": 0.0018, "step": 10648 }, { "epoch": 7.377208174575684, "grad_norm": 0.12508131563663483, "learning_rate": 2.6227461858529825e-06, "loss": 0.0015, "step": 10649 }, { "epoch": 7.377900935226879, "grad_norm": 0.17339982092380524, "learning_rate": 2.6220527045769766e-06, "loss": 0.0021, "step": 10650 }, { "epoch": 7.378593695878074, "grad_norm": 0.21655355393886566, "learning_rate": 2.621359223300971e-06, "loss": 0.0016, "step": 10651 }, { "epoch": 7.379286456529269, "grad_norm": 0.2767361104488373, "learning_rate": 2.6206657420249652e-06, "loss": 0.0019, "step": 10652 }, { "epoch": 7.379979217180464, "grad_norm": 0.24723610281944275, "learning_rate": 2.6199722607489597e-06, "loss": 0.0019, "step": 10653 }, { "epoch": 7.380671977831659, "grad_norm": 0.1405622512102127, "learning_rate": 2.6192787794729547e-06, "loss": 0.0014, "step": 10654 }, { "epoch": 7.381364738482854, "grad_norm": 0.11951754242181778, "learning_rate": 2.6185852981969488e-06, "loss": 0.0018, "step": 10655 }, { "epoch": 7.382057499134049, "grad_norm": 0.13081449270248413, "learning_rate": 2.6178918169209433e-06, "loss": 0.0017, "step": 10656 }, { "epoch": 7.382750259785245, "grad_norm": 0.10164713114500046, "learning_rate": 2.6171983356449378e-06, "loss": 0.0016, "step": 10657 }, { "epoch": 7.383443020436439, "grad_norm": 0.1691260188817978, "learning_rate": 2.616504854368932e-06, "loss": 0.0022, "step": 10658 }, { "epoch": 7.384135781087634, "grad_norm": 0.08594096451997757, "learning_rate": 2.615811373092927e-06, "loss": 0.0016, "step": 10659 }, { "epoch": 7.3848285417388295, "grad_norm": 0.14206601679325104, "learning_rate": 2.6151178918169213e-06, "loss": 0.0018, "step": 10660 }, { "epoch": 7.385521302390024, "grad_norm": 0.07885897159576416, "learning_rate": 2.6144244105409154e-06, "loss": 0.0015, "step": 10661 }, { "epoch": 7.386214063041219, "grad_norm": 0.14833572506904602, "learning_rate": 2.61373092926491e-06, "loss": 0.0018, "step": 10662 }, { "epoch": 7.3869068236924145, "grad_norm": 0.2038232535123825, "learning_rate": 2.6130374479889044e-06, "loss": 0.0038, "step": 10663 }, { "epoch": 7.38759958434361, "grad_norm": 0.09282398223876953, "learning_rate": 2.6123439667128993e-06, "loss": 0.0018, "step": 10664 }, { "epoch": 7.388292344994804, "grad_norm": 0.17177100479602814, "learning_rate": 2.6116504854368934e-06, "loss": 0.0024, "step": 10665 }, { "epoch": 7.388985105645999, "grad_norm": 0.1776081919670105, "learning_rate": 2.610957004160888e-06, "loss": 0.0032, "step": 10666 }, { "epoch": 7.389677866297195, "grad_norm": 0.08193469792604446, "learning_rate": 2.610263522884882e-06, "loss": 0.0016, "step": 10667 }, { "epoch": 7.390370626948389, "grad_norm": 0.1997789740562439, "learning_rate": 2.6095700416088765e-06, "loss": 0.0028, "step": 10668 }, { "epoch": 7.391063387599584, "grad_norm": 0.3581722378730774, "learning_rate": 2.6088765603328715e-06, "loss": 0.0019, "step": 10669 }, { "epoch": 7.39175614825078, "grad_norm": 0.09195563197135925, "learning_rate": 2.6081830790568656e-06, "loss": 0.0017, "step": 10670 }, { "epoch": 7.392448908901974, "grad_norm": 0.27698156237602234, "learning_rate": 2.60748959778086e-06, "loss": 0.0017, "step": 10671 }, { "epoch": 7.393141669553169, "grad_norm": 0.1314757615327835, "learning_rate": 2.6067961165048546e-06, "loss": 0.0021, "step": 10672 }, { "epoch": 7.393834430204365, "grad_norm": 0.055855996906757355, "learning_rate": 2.6061026352288487e-06, "loss": 0.0013, "step": 10673 }, { "epoch": 7.394527190855559, "grad_norm": 0.18448542058467865, "learning_rate": 2.6054091539528436e-06, "loss": 0.0019, "step": 10674 }, { "epoch": 7.395219951506754, "grad_norm": 0.1572575569152832, "learning_rate": 2.604715672676838e-06, "loss": 0.0019, "step": 10675 }, { "epoch": 7.3959127121579495, "grad_norm": 0.22911396622657776, "learning_rate": 2.604022191400832e-06, "loss": 0.002, "step": 10676 }, { "epoch": 7.396605472809145, "grad_norm": 0.0508640892803669, "learning_rate": 2.6033287101248267e-06, "loss": 0.0013, "step": 10677 }, { "epoch": 7.397298233460339, "grad_norm": 0.1964741051197052, "learning_rate": 2.602635228848821e-06, "loss": 0.0023, "step": 10678 }, { "epoch": 7.3979909941115345, "grad_norm": 0.16124382615089417, "learning_rate": 2.6019417475728157e-06, "loss": 0.0016, "step": 10679 }, { "epoch": 7.39868375476273, "grad_norm": 0.22576114535331726, "learning_rate": 2.6012482662968102e-06, "loss": 0.0031, "step": 10680 }, { "epoch": 7.399376515413924, "grad_norm": 0.1622762680053711, "learning_rate": 2.6005547850208047e-06, "loss": 0.0018, "step": 10681 }, { "epoch": 7.400069276065119, "grad_norm": 0.18522429466247559, "learning_rate": 2.599861303744799e-06, "loss": 0.0019, "step": 10682 }, { "epoch": 7.400762036716315, "grad_norm": 0.07557988911867142, "learning_rate": 2.5991678224687933e-06, "loss": 0.0014, "step": 10683 }, { "epoch": 7.40145479736751, "grad_norm": 0.22076094150543213, "learning_rate": 2.5984743411927883e-06, "loss": 0.0021, "step": 10684 }, { "epoch": 7.402147558018704, "grad_norm": 0.2016923874616623, "learning_rate": 2.5977808599167824e-06, "loss": 0.0027, "step": 10685 }, { "epoch": 7.4028403186699, "grad_norm": 0.2840091288089752, "learning_rate": 2.597087378640777e-06, "loss": 0.0028, "step": 10686 }, { "epoch": 7.403533079321095, "grad_norm": 0.24551159143447876, "learning_rate": 2.5963938973647714e-06, "loss": 0.0034, "step": 10687 }, { "epoch": 7.404225839972289, "grad_norm": 0.28587213158607483, "learning_rate": 2.5957004160887655e-06, "loss": 0.002, "step": 10688 }, { "epoch": 7.404918600623485, "grad_norm": 0.17210809886455536, "learning_rate": 2.5950069348127604e-06, "loss": 0.0019, "step": 10689 }, { "epoch": 7.40561136127468, "grad_norm": 0.1567090004682541, "learning_rate": 2.594313453536755e-06, "loss": 0.0017, "step": 10690 }, { "epoch": 7.406304121925874, "grad_norm": 0.15413030982017517, "learning_rate": 2.593619972260749e-06, "loss": 0.0018, "step": 10691 }, { "epoch": 7.40699688257707, "grad_norm": 0.10482992231845856, "learning_rate": 2.5929264909847435e-06, "loss": 0.0015, "step": 10692 }, { "epoch": 7.407689643228265, "grad_norm": 0.13086524605751038, "learning_rate": 2.592233009708738e-06, "loss": 0.0019, "step": 10693 }, { "epoch": 7.408382403879459, "grad_norm": 0.07652492076158524, "learning_rate": 2.5915395284327325e-06, "loss": 0.0016, "step": 10694 }, { "epoch": 7.4090751645306545, "grad_norm": 0.07175049930810928, "learning_rate": 2.590846047156727e-06, "loss": 0.0014, "step": 10695 }, { "epoch": 7.40976792518185, "grad_norm": 0.12819047272205353, "learning_rate": 2.5901525658807215e-06, "loss": 0.0022, "step": 10696 }, { "epoch": 7.410460685833045, "grad_norm": 0.08351161330938339, "learning_rate": 2.5894590846047156e-06, "loss": 0.0013, "step": 10697 }, { "epoch": 7.4111534464842395, "grad_norm": 0.07833921164274216, "learning_rate": 2.58876560332871e-06, "loss": 0.0013, "step": 10698 }, { "epoch": 7.411846207135435, "grad_norm": 0.16410838067531586, "learning_rate": 2.588072122052705e-06, "loss": 0.0019, "step": 10699 }, { "epoch": 7.41253896778663, "grad_norm": 0.1312284618616104, "learning_rate": 2.587378640776699e-06, "loss": 0.0014, "step": 10700 }, { "epoch": 7.413231728437824, "grad_norm": 0.04965529218316078, "learning_rate": 2.5866851595006937e-06, "loss": 0.0011, "step": 10701 }, { "epoch": 7.41392448908902, "grad_norm": 0.06282943487167358, "learning_rate": 2.585991678224688e-06, "loss": 0.0013, "step": 10702 }, { "epoch": 7.414617249740215, "grad_norm": 0.08431761711835861, "learning_rate": 2.5852981969486823e-06, "loss": 0.0013, "step": 10703 }, { "epoch": 7.415310010391409, "grad_norm": 0.09277688711881638, "learning_rate": 2.584604715672677e-06, "loss": 0.0019, "step": 10704 }, { "epoch": 7.416002771042605, "grad_norm": 0.09888166934251785, "learning_rate": 2.5839112343966717e-06, "loss": 0.0017, "step": 10705 }, { "epoch": 7.4166955316938, "grad_norm": 0.06800324469804764, "learning_rate": 2.583217753120666e-06, "loss": 0.0015, "step": 10706 }, { "epoch": 7.417388292344995, "grad_norm": 0.1317177712917328, "learning_rate": 2.5825242718446603e-06, "loss": 0.0015, "step": 10707 }, { "epoch": 7.41808105299619, "grad_norm": 0.42098212242126465, "learning_rate": 2.5818307905686544e-06, "loss": 0.0023, "step": 10708 }, { "epoch": 7.418773813647385, "grad_norm": 0.09738583117723465, "learning_rate": 2.5811373092926493e-06, "loss": 0.0017, "step": 10709 }, { "epoch": 7.41946657429858, "grad_norm": 0.12297433614730835, "learning_rate": 2.580443828016644e-06, "loss": 0.0019, "step": 10710 }, { "epoch": 7.4201593349497745, "grad_norm": 0.15178140997886658, "learning_rate": 2.5797503467406383e-06, "loss": 0.0017, "step": 10711 }, { "epoch": 7.42085209560097, "grad_norm": 0.05053913965821266, "learning_rate": 2.5790568654646324e-06, "loss": 0.0011, "step": 10712 }, { "epoch": 7.421544856252165, "grad_norm": 0.07242614775896072, "learning_rate": 2.578363384188627e-06, "loss": 0.0013, "step": 10713 }, { "epoch": 7.4222376169033595, "grad_norm": 0.4279836118221283, "learning_rate": 2.577669902912622e-06, "loss": 0.002, "step": 10714 }, { "epoch": 7.422930377554555, "grad_norm": 0.13643436133861542, "learning_rate": 2.576976421636616e-06, "loss": 0.0018, "step": 10715 }, { "epoch": 7.42362313820575, "grad_norm": 0.08897361159324646, "learning_rate": 2.5762829403606105e-06, "loss": 0.0018, "step": 10716 }, { "epoch": 7.424315898856945, "grad_norm": 0.1289350539445877, "learning_rate": 2.575589459084605e-06, "loss": 0.0015, "step": 10717 }, { "epoch": 7.42500865950814, "grad_norm": 0.04847308248281479, "learning_rate": 2.574895977808599e-06, "loss": 0.0012, "step": 10718 }, { "epoch": 7.425701420159335, "grad_norm": 0.14715151488780975, "learning_rate": 2.574202496532594e-06, "loss": 0.002, "step": 10719 }, { "epoch": 7.42639418081053, "grad_norm": 0.2963087260723114, "learning_rate": 2.5735090152565885e-06, "loss": 0.0028, "step": 10720 }, { "epoch": 7.427086941461725, "grad_norm": 0.12237878143787384, "learning_rate": 2.5728155339805826e-06, "loss": 0.0021, "step": 10721 }, { "epoch": 7.42777970211292, "grad_norm": 0.08332429826259613, "learning_rate": 2.572122052704577e-06, "loss": 0.0015, "step": 10722 }, { "epoch": 7.428472462764115, "grad_norm": 0.07035665214061737, "learning_rate": 2.571428571428571e-06, "loss": 0.0014, "step": 10723 }, { "epoch": 7.42916522341531, "grad_norm": 0.06702131032943726, "learning_rate": 2.570735090152566e-06, "loss": 0.0015, "step": 10724 }, { "epoch": 7.429857984066505, "grad_norm": 0.18980415165424347, "learning_rate": 2.5700416088765606e-06, "loss": 0.0019, "step": 10725 }, { "epoch": 7.4305507447177, "grad_norm": 0.12252097576856613, "learning_rate": 2.569348127600555e-06, "loss": 0.0017, "step": 10726 }, { "epoch": 7.431243505368895, "grad_norm": 0.11813729256391525, "learning_rate": 2.5686546463245492e-06, "loss": 0.0016, "step": 10727 }, { "epoch": 7.43193626602009, "grad_norm": 0.056314866989851, "learning_rate": 2.5679611650485437e-06, "loss": 0.0012, "step": 10728 }, { "epoch": 7.432629026671285, "grad_norm": 0.10481024533510208, "learning_rate": 2.5672676837725387e-06, "loss": 0.0015, "step": 10729 }, { "epoch": 7.43332178732248, "grad_norm": 0.14865724742412567, "learning_rate": 2.5665742024965328e-06, "loss": 0.0025, "step": 10730 }, { "epoch": 7.434014547973675, "grad_norm": 0.08985798060894012, "learning_rate": 2.5658807212205273e-06, "loss": 0.0014, "step": 10731 }, { "epoch": 7.43470730862487, "grad_norm": 0.07113194465637207, "learning_rate": 2.5651872399445214e-06, "loss": 0.0014, "step": 10732 }, { "epoch": 7.435400069276065, "grad_norm": 0.11472031474113464, "learning_rate": 2.564493758668516e-06, "loss": 0.0021, "step": 10733 }, { "epoch": 7.43609282992726, "grad_norm": 0.12708251178264618, "learning_rate": 2.563800277392511e-06, "loss": 0.0017, "step": 10734 }, { "epoch": 7.436785590578455, "grad_norm": 0.196594700217247, "learning_rate": 2.5631067961165053e-06, "loss": 0.0034, "step": 10735 }, { "epoch": 7.43747835122965, "grad_norm": 0.10423848778009415, "learning_rate": 2.5624133148404994e-06, "loss": 0.0018, "step": 10736 }, { "epoch": 7.438171111880846, "grad_norm": 0.1535194218158722, "learning_rate": 2.561719833564494e-06, "loss": 0.0031, "step": 10737 }, { "epoch": 7.43886387253204, "grad_norm": 0.2090843766927719, "learning_rate": 2.561026352288488e-06, "loss": 0.0023, "step": 10738 }, { "epoch": 7.439556633183235, "grad_norm": 0.10467515885829926, "learning_rate": 2.560332871012483e-06, "loss": 0.0013, "step": 10739 }, { "epoch": 7.4402493938344305, "grad_norm": 0.35857662558555603, "learning_rate": 2.5596393897364774e-06, "loss": 0.0035, "step": 10740 }, { "epoch": 7.440942154485625, "grad_norm": 0.09160470962524414, "learning_rate": 2.558945908460472e-06, "loss": 0.0013, "step": 10741 }, { "epoch": 7.44163491513682, "grad_norm": 0.06767398118972778, "learning_rate": 2.558252427184466e-06, "loss": 0.0013, "step": 10742 }, { "epoch": 7.4423276757880155, "grad_norm": 0.08082219958305359, "learning_rate": 2.5575589459084605e-06, "loss": 0.0017, "step": 10743 }, { "epoch": 7.44302043643921, "grad_norm": 0.27682968974113464, "learning_rate": 2.5568654646324555e-06, "loss": 0.0024, "step": 10744 }, { "epoch": 7.443713197090405, "grad_norm": 0.11621539294719696, "learning_rate": 2.5561719833564496e-06, "loss": 0.0015, "step": 10745 }, { "epoch": 7.4444059577416, "grad_norm": 0.18691407144069672, "learning_rate": 2.555478502080444e-06, "loss": 0.0017, "step": 10746 }, { "epoch": 7.445098718392796, "grad_norm": 0.11532662063837051, "learning_rate": 2.554785020804438e-06, "loss": 0.0013, "step": 10747 }, { "epoch": 7.44579147904399, "grad_norm": 0.10876644402742386, "learning_rate": 2.5540915395284327e-06, "loss": 0.0015, "step": 10748 }, { "epoch": 7.446484239695185, "grad_norm": 0.1477528214454651, "learning_rate": 2.5533980582524276e-06, "loss": 0.0017, "step": 10749 }, { "epoch": 7.447177000346381, "grad_norm": 0.5255882143974304, "learning_rate": 2.552704576976422e-06, "loss": 0.0018, "step": 10750 }, { "epoch": 7.447869760997575, "grad_norm": 0.2798275351524353, "learning_rate": 2.552011095700416e-06, "loss": 0.0023, "step": 10751 }, { "epoch": 7.44856252164877, "grad_norm": 0.23782400786876678, "learning_rate": 2.5513176144244107e-06, "loss": 0.002, "step": 10752 }, { "epoch": 7.449255282299966, "grad_norm": 0.17771799862384796, "learning_rate": 2.550624133148405e-06, "loss": 0.0023, "step": 10753 }, { "epoch": 7.44994804295116, "grad_norm": 0.13188570737838745, "learning_rate": 2.5499306518723997e-06, "loss": 0.0018, "step": 10754 }, { "epoch": 7.450640803602355, "grad_norm": 0.1313145011663437, "learning_rate": 2.5492371705963942e-06, "loss": 0.0017, "step": 10755 }, { "epoch": 7.4513335642535505, "grad_norm": 0.11395426839590073, "learning_rate": 2.5485436893203887e-06, "loss": 0.0016, "step": 10756 }, { "epoch": 7.452026324904746, "grad_norm": 0.11718688905239105, "learning_rate": 2.547850208044383e-06, "loss": 0.0016, "step": 10757 }, { "epoch": 7.45271908555594, "grad_norm": 0.1720968782901764, "learning_rate": 2.5471567267683773e-06, "loss": 0.0023, "step": 10758 }, { "epoch": 7.4534118462071355, "grad_norm": 0.11114460974931717, "learning_rate": 2.5464632454923723e-06, "loss": 0.0019, "step": 10759 }, { "epoch": 7.454104606858331, "grad_norm": 0.10008231550455093, "learning_rate": 2.5457697642163664e-06, "loss": 0.0016, "step": 10760 }, { "epoch": 7.454797367509525, "grad_norm": 0.08406735211610794, "learning_rate": 2.545076282940361e-06, "loss": 0.0015, "step": 10761 }, { "epoch": 7.45549012816072, "grad_norm": 0.09855545312166214, "learning_rate": 2.544382801664355e-06, "loss": 0.0018, "step": 10762 }, { "epoch": 7.456182888811916, "grad_norm": 0.17305642366409302, "learning_rate": 2.5436893203883495e-06, "loss": 0.0018, "step": 10763 }, { "epoch": 7.45687564946311, "grad_norm": 0.09363764524459839, "learning_rate": 2.5429958391123444e-06, "loss": 0.0016, "step": 10764 }, { "epoch": 7.457568410114305, "grad_norm": 0.15001751482486725, "learning_rate": 2.542302357836339e-06, "loss": 0.0016, "step": 10765 }, { "epoch": 7.458261170765501, "grad_norm": 0.1612853854894638, "learning_rate": 2.541608876560333e-06, "loss": 0.002, "step": 10766 }, { "epoch": 7.458953931416696, "grad_norm": 0.14551077783107758, "learning_rate": 2.5409153952843275e-06, "loss": 0.0025, "step": 10767 }, { "epoch": 7.45964669206789, "grad_norm": 0.09144605696201324, "learning_rate": 2.5402219140083216e-06, "loss": 0.0016, "step": 10768 }, { "epoch": 7.460339452719086, "grad_norm": 0.1093723326921463, "learning_rate": 2.5395284327323165e-06, "loss": 0.0015, "step": 10769 }, { "epoch": 7.461032213370281, "grad_norm": 0.10207368433475494, "learning_rate": 2.538834951456311e-06, "loss": 0.0015, "step": 10770 }, { "epoch": 7.461724974021475, "grad_norm": 0.2664475739002228, "learning_rate": 2.538141470180305e-06, "loss": 0.0023, "step": 10771 }, { "epoch": 7.4624177346726706, "grad_norm": 0.45225033164024353, "learning_rate": 2.5374479889042996e-06, "loss": 0.0035, "step": 10772 }, { "epoch": 7.463110495323866, "grad_norm": 0.08390573412179947, "learning_rate": 2.536754507628294e-06, "loss": 0.0015, "step": 10773 }, { "epoch": 7.46380325597506, "grad_norm": 0.07127819955348969, "learning_rate": 2.536061026352289e-06, "loss": 0.0015, "step": 10774 }, { "epoch": 7.4644960166262555, "grad_norm": 0.13551568984985352, "learning_rate": 2.535367545076283e-06, "loss": 0.0016, "step": 10775 }, { "epoch": 7.465188777277451, "grad_norm": 0.10462245345115662, "learning_rate": 2.5346740638002777e-06, "loss": 0.0018, "step": 10776 }, { "epoch": 7.465881537928646, "grad_norm": 0.08148667216300964, "learning_rate": 2.5339805825242718e-06, "loss": 0.0013, "step": 10777 }, { "epoch": 7.4665742985798405, "grad_norm": 0.18152490258216858, "learning_rate": 2.5332871012482663e-06, "loss": 0.0019, "step": 10778 }, { "epoch": 7.467267059231036, "grad_norm": 0.10702257603406906, "learning_rate": 2.532593619972261e-06, "loss": 0.0016, "step": 10779 }, { "epoch": 7.467959819882231, "grad_norm": 0.13292056322097778, "learning_rate": 2.5319001386962557e-06, "loss": 0.002, "step": 10780 }, { "epoch": 7.468652580533425, "grad_norm": 0.07058043032884598, "learning_rate": 2.53120665742025e-06, "loss": 0.0014, "step": 10781 }, { "epoch": 7.469345341184621, "grad_norm": 0.08694568276405334, "learning_rate": 2.5305131761442443e-06, "loss": 0.0015, "step": 10782 }, { "epoch": 7.470038101835816, "grad_norm": 0.15851189196109772, "learning_rate": 2.5298196948682384e-06, "loss": 0.003, "step": 10783 }, { "epoch": 7.47073086248701, "grad_norm": 0.1388746052980423, "learning_rate": 2.5291262135922333e-06, "loss": 0.0017, "step": 10784 }, { "epoch": 7.471423623138206, "grad_norm": 0.1215035617351532, "learning_rate": 2.528432732316228e-06, "loss": 0.0016, "step": 10785 }, { "epoch": 7.472116383789401, "grad_norm": 0.14704327285289764, "learning_rate": 2.527739251040222e-06, "loss": 0.0019, "step": 10786 }, { "epoch": 7.472809144440596, "grad_norm": 0.1499715894460678, "learning_rate": 2.5270457697642164e-06, "loss": 0.0019, "step": 10787 }, { "epoch": 7.473501905091791, "grad_norm": 0.14678151905536652, "learning_rate": 2.526352288488211e-06, "loss": 0.002, "step": 10788 }, { "epoch": 7.474194665742986, "grad_norm": 0.35057759284973145, "learning_rate": 2.525658807212206e-06, "loss": 0.0022, "step": 10789 }, { "epoch": 7.474887426394181, "grad_norm": 0.14562055468559265, "learning_rate": 2.5249653259362e-06, "loss": 0.0033, "step": 10790 }, { "epoch": 7.4755801870453755, "grad_norm": 0.13831159472465515, "learning_rate": 2.5242718446601945e-06, "loss": 0.0017, "step": 10791 }, { "epoch": 7.476272947696571, "grad_norm": 0.10678946226835251, "learning_rate": 2.5235783633841886e-06, "loss": 0.0016, "step": 10792 }, { "epoch": 7.476965708347766, "grad_norm": 0.2228410840034485, "learning_rate": 2.522884882108183e-06, "loss": 0.0022, "step": 10793 }, { "epoch": 7.4776584689989605, "grad_norm": 0.24775110185146332, "learning_rate": 2.522191400832178e-06, "loss": 0.0022, "step": 10794 }, { "epoch": 7.478351229650156, "grad_norm": 0.18266801536083221, "learning_rate": 2.521497919556172e-06, "loss": 0.0021, "step": 10795 }, { "epoch": 7.479043990301351, "grad_norm": 0.06549399346113205, "learning_rate": 2.5208044382801666e-06, "loss": 0.0014, "step": 10796 }, { "epoch": 7.479736750952546, "grad_norm": 0.27793261408805847, "learning_rate": 2.520110957004161e-06, "loss": 0.0025, "step": 10797 }, { "epoch": 7.480429511603741, "grad_norm": 0.08995774388313293, "learning_rate": 2.519417475728155e-06, "loss": 0.0017, "step": 10798 }, { "epoch": 7.481122272254936, "grad_norm": 0.2409883588552475, "learning_rate": 2.51872399445215e-06, "loss": 0.0043, "step": 10799 }, { "epoch": 7.481815032906131, "grad_norm": 0.11554820835590363, "learning_rate": 2.5180305131761446e-06, "loss": 0.0024, "step": 10800 }, { "epoch": 7.482507793557326, "grad_norm": 0.17683421075344086, "learning_rate": 2.5173370319001387e-06, "loss": 0.0019, "step": 10801 }, { "epoch": 7.483200554208521, "grad_norm": 0.12466317415237427, "learning_rate": 2.5166435506241332e-06, "loss": 0.002, "step": 10802 }, { "epoch": 7.483893314859716, "grad_norm": 0.7196019291877747, "learning_rate": 2.5159500693481277e-06, "loss": 0.0017, "step": 10803 }, { "epoch": 7.484586075510911, "grad_norm": 0.690550684928894, "learning_rate": 2.5152565880721227e-06, "loss": 0.0018, "step": 10804 }, { "epoch": 7.485278836162106, "grad_norm": 0.12522132694721222, "learning_rate": 2.5145631067961168e-06, "loss": 0.002, "step": 10805 }, { "epoch": 7.485971596813301, "grad_norm": 0.33406364917755127, "learning_rate": 2.5138696255201113e-06, "loss": 0.0019, "step": 10806 }, { "epoch": 7.486664357464496, "grad_norm": 0.18372103571891785, "learning_rate": 2.5131761442441054e-06, "loss": 0.0022, "step": 10807 }, { "epoch": 7.487357118115691, "grad_norm": 0.20089584589004517, "learning_rate": 2.5124826629681e-06, "loss": 0.0016, "step": 10808 }, { "epoch": 7.488049878766886, "grad_norm": 0.4255248010158539, "learning_rate": 2.511789181692095e-06, "loss": 0.002, "step": 10809 }, { "epoch": 7.488742639418081, "grad_norm": 0.16915470361709595, "learning_rate": 2.511095700416089e-06, "loss": 0.0021, "step": 10810 }, { "epoch": 7.489435400069276, "grad_norm": 0.2463148981332779, "learning_rate": 2.5104022191400834e-06, "loss": 0.0016, "step": 10811 }, { "epoch": 7.490128160720471, "grad_norm": 0.20457732677459717, "learning_rate": 2.509708737864078e-06, "loss": 0.0019, "step": 10812 }, { "epoch": 7.490820921371666, "grad_norm": 0.14776448905467987, "learning_rate": 2.509015256588072e-06, "loss": 0.0018, "step": 10813 }, { "epoch": 7.491513682022861, "grad_norm": 0.1816481500864029, "learning_rate": 2.508321775312067e-06, "loss": 0.002, "step": 10814 }, { "epoch": 7.492206442674056, "grad_norm": 0.0913500189781189, "learning_rate": 2.5076282940360614e-06, "loss": 0.0016, "step": 10815 }, { "epoch": 7.492899203325251, "grad_norm": 0.14668625593185425, "learning_rate": 2.5069348127600555e-06, "loss": 0.002, "step": 10816 }, { "epoch": 7.493591963976446, "grad_norm": 0.2666305899620056, "learning_rate": 2.50624133148405e-06, "loss": 0.0022, "step": 10817 }, { "epoch": 7.494284724627641, "grad_norm": 0.14975956082344055, "learning_rate": 2.5055478502080445e-06, "loss": 0.0022, "step": 10818 }, { "epoch": 7.494977485278836, "grad_norm": 0.07446333765983582, "learning_rate": 2.504854368932039e-06, "loss": 0.0014, "step": 10819 }, { "epoch": 7.4956702459300315, "grad_norm": 0.10726433247327805, "learning_rate": 2.5041608876560336e-06, "loss": 0.0016, "step": 10820 }, { "epoch": 7.496363006581226, "grad_norm": 0.10890372842550278, "learning_rate": 2.503467406380028e-06, "loss": 0.0015, "step": 10821 }, { "epoch": 7.497055767232421, "grad_norm": 0.10032851994037628, "learning_rate": 2.502773925104022e-06, "loss": 0.0019, "step": 10822 }, { "epoch": 7.4977485278836165, "grad_norm": 0.12057121098041534, "learning_rate": 2.5020804438280167e-06, "loss": 0.0018, "step": 10823 }, { "epoch": 7.498441288534811, "grad_norm": 0.1493215560913086, "learning_rate": 2.5013869625520116e-06, "loss": 0.0017, "step": 10824 }, { "epoch": 7.499134049186006, "grad_norm": 0.07878365367650986, "learning_rate": 2.5006934812760057e-06, "loss": 0.0014, "step": 10825 }, { "epoch": 7.499826809837201, "grad_norm": 0.20616844296455383, "learning_rate": 2.5e-06, "loss": 0.002, "step": 10826 }, { "epoch": 7.500519570488397, "grad_norm": 0.30661019682884216, "learning_rate": 2.4993065187239947e-06, "loss": 0.0044, "step": 10827 }, { "epoch": 7.501212331139591, "grad_norm": 0.09549485146999359, "learning_rate": 2.4986130374479892e-06, "loss": 0.0016, "step": 10828 }, { "epoch": 7.501905091790786, "grad_norm": 0.1462438851594925, "learning_rate": 2.4979195561719833e-06, "loss": 0.0017, "step": 10829 }, { "epoch": 7.502597852441982, "grad_norm": 0.20211417973041534, "learning_rate": 2.4972260748959782e-06, "loss": 0.0018, "step": 10830 }, { "epoch": 7.503290613093176, "grad_norm": 0.5373084545135498, "learning_rate": 2.4965325936199723e-06, "loss": 0.003, "step": 10831 }, { "epoch": 7.503983373744371, "grad_norm": 0.09695852547883987, "learning_rate": 2.495839112343967e-06, "loss": 0.0019, "step": 10832 }, { "epoch": 7.504676134395567, "grad_norm": 0.12500806152820587, "learning_rate": 2.4951456310679614e-06, "loss": 0.0025, "step": 10833 }, { "epoch": 7.505368895046761, "grad_norm": 0.09857065975666046, "learning_rate": 2.494452149791956e-06, "loss": 0.0018, "step": 10834 }, { "epoch": 7.506061655697956, "grad_norm": 0.08579905331134796, "learning_rate": 2.4937586685159504e-06, "loss": 0.0016, "step": 10835 }, { "epoch": 7.5067544163491515, "grad_norm": 0.11580824106931686, "learning_rate": 2.493065187239945e-06, "loss": 0.0017, "step": 10836 }, { "epoch": 7.507447177000346, "grad_norm": 0.119056336581707, "learning_rate": 2.492371705963939e-06, "loss": 0.0025, "step": 10837 }, { "epoch": 7.508139937651541, "grad_norm": 0.18927937746047974, "learning_rate": 2.491678224687934e-06, "loss": 0.0017, "step": 10838 }, { "epoch": 7.5088326983027365, "grad_norm": 0.21251261234283447, "learning_rate": 2.490984743411928e-06, "loss": 0.0025, "step": 10839 }, { "epoch": 7.509525458953932, "grad_norm": 0.05846820026636124, "learning_rate": 2.4902912621359225e-06, "loss": 0.0013, "step": 10840 }, { "epoch": 7.510218219605126, "grad_norm": 0.06526561826467514, "learning_rate": 2.489597780859917e-06, "loss": 0.0013, "step": 10841 }, { "epoch": 7.510910980256321, "grad_norm": 0.14574207365512848, "learning_rate": 2.4889042995839115e-06, "loss": 0.0014, "step": 10842 }, { "epoch": 7.511603740907517, "grad_norm": 0.08202770352363586, "learning_rate": 2.488210818307906e-06, "loss": 0.0015, "step": 10843 }, { "epoch": 7.512296501558711, "grad_norm": 0.257617324590683, "learning_rate": 2.4875173370319e-06, "loss": 0.0026, "step": 10844 }, { "epoch": 7.512989262209906, "grad_norm": 0.16567140817642212, "learning_rate": 2.486823855755895e-06, "loss": 0.002, "step": 10845 }, { "epoch": 7.513682022861102, "grad_norm": 0.41710230708122253, "learning_rate": 2.486130374479889e-06, "loss": 0.0024, "step": 10846 }, { "epoch": 7.514374783512297, "grad_norm": 0.12412635236978531, "learning_rate": 2.4854368932038836e-06, "loss": 0.0021, "step": 10847 }, { "epoch": 7.515067544163491, "grad_norm": 0.09788189828395844, "learning_rate": 2.484743411927878e-06, "loss": 0.0019, "step": 10848 }, { "epoch": 7.515760304814687, "grad_norm": 0.07323741912841797, "learning_rate": 2.4840499306518727e-06, "loss": 0.0012, "step": 10849 }, { "epoch": 7.516453065465882, "grad_norm": 0.13336296379566193, "learning_rate": 2.483356449375867e-06, "loss": 0.0016, "step": 10850 }, { "epoch": 7.517145826117076, "grad_norm": 0.08538836240768433, "learning_rate": 2.4826629680998617e-06, "loss": 0.0014, "step": 10851 }, { "epoch": 7.5178385867682715, "grad_norm": 0.10788532346487045, "learning_rate": 2.4819694868238558e-06, "loss": 0.0016, "step": 10852 }, { "epoch": 7.518531347419467, "grad_norm": 0.11885540187358856, "learning_rate": 2.4812760055478503e-06, "loss": 0.0016, "step": 10853 }, { "epoch": 7.519224108070661, "grad_norm": 0.2749646008014679, "learning_rate": 2.4805825242718448e-06, "loss": 0.002, "step": 10854 }, { "epoch": 7.5199168687218565, "grad_norm": 0.11428909003734589, "learning_rate": 2.4798890429958393e-06, "loss": 0.0019, "step": 10855 }, { "epoch": 7.520609629373052, "grad_norm": 0.14908888936042786, "learning_rate": 2.479195561719834e-06, "loss": 0.0024, "step": 10856 }, { "epoch": 7.521302390024246, "grad_norm": 0.207083061337471, "learning_rate": 2.4785020804438283e-06, "loss": 0.0018, "step": 10857 }, { "epoch": 7.521995150675441, "grad_norm": 0.11959749460220337, "learning_rate": 2.477808599167823e-06, "loss": 0.0018, "step": 10858 }, { "epoch": 7.522687911326637, "grad_norm": 0.10812567174434662, "learning_rate": 2.477115117891817e-06, "loss": 0.0019, "step": 10859 }, { "epoch": 7.523380671977832, "grad_norm": 0.138017937541008, "learning_rate": 2.476421636615812e-06, "loss": 0.0018, "step": 10860 }, { "epoch": 7.524073432629026, "grad_norm": 0.38856616616249084, "learning_rate": 2.475728155339806e-06, "loss": 0.0024, "step": 10861 }, { "epoch": 7.524766193280222, "grad_norm": 0.140238419175148, "learning_rate": 2.4750346740638004e-06, "loss": 0.0019, "step": 10862 }, { "epoch": 7.525458953931417, "grad_norm": 0.09630647301673889, "learning_rate": 2.474341192787795e-06, "loss": 0.002, "step": 10863 }, { "epoch": 7.526151714582611, "grad_norm": 0.07080446183681488, "learning_rate": 2.4736477115117895e-06, "loss": 0.0016, "step": 10864 }, { "epoch": 7.526844475233807, "grad_norm": 0.14970698952674866, "learning_rate": 2.472954230235784e-06, "loss": 0.0024, "step": 10865 }, { "epoch": 7.527537235885002, "grad_norm": 0.0889345034956932, "learning_rate": 2.4722607489597785e-06, "loss": 0.0014, "step": 10866 }, { "epoch": 7.528229996536197, "grad_norm": 0.2585148513317108, "learning_rate": 2.4715672676837726e-06, "loss": 0.0028, "step": 10867 }, { "epoch": 7.528922757187392, "grad_norm": 0.11531859636306763, "learning_rate": 2.470873786407767e-06, "loss": 0.0018, "step": 10868 }, { "epoch": 7.529615517838587, "grad_norm": 0.1980704963207245, "learning_rate": 2.4701803051317616e-06, "loss": 0.0017, "step": 10869 }, { "epoch": 7.530308278489782, "grad_norm": 0.24060817062854767, "learning_rate": 2.469486823855756e-06, "loss": 0.0019, "step": 10870 }, { "epoch": 7.5310010391409765, "grad_norm": 0.12572532892227173, "learning_rate": 2.4687933425797506e-06, "loss": 0.0016, "step": 10871 }, { "epoch": 7.531693799792172, "grad_norm": 0.11596440523862839, "learning_rate": 2.4680998613037447e-06, "loss": 0.0018, "step": 10872 }, { "epoch": 7.532386560443367, "grad_norm": 0.3188724219799042, "learning_rate": 2.4674063800277396e-06, "loss": 0.0031, "step": 10873 }, { "epoch": 7.5330793210945615, "grad_norm": 0.05614947900176048, "learning_rate": 2.4667128987517337e-06, "loss": 0.0014, "step": 10874 }, { "epoch": 7.533772081745757, "grad_norm": 0.08537238091230392, "learning_rate": 2.4660194174757286e-06, "loss": 0.0018, "step": 10875 }, { "epoch": 7.534464842396952, "grad_norm": 0.30908671021461487, "learning_rate": 2.4653259361997227e-06, "loss": 0.0025, "step": 10876 }, { "epoch": 7.535157603048146, "grad_norm": 0.10444451123476028, "learning_rate": 2.4646324549237172e-06, "loss": 0.0017, "step": 10877 }, { "epoch": 7.535850363699342, "grad_norm": 0.3646766245365143, "learning_rate": 2.4639389736477118e-06, "loss": 0.0021, "step": 10878 }, { "epoch": 7.536543124350537, "grad_norm": 0.09241663664579391, "learning_rate": 2.4632454923717063e-06, "loss": 0.0015, "step": 10879 }, { "epoch": 7.537235885001732, "grad_norm": 0.061191245913505554, "learning_rate": 2.4625520110957008e-06, "loss": 0.0012, "step": 10880 }, { "epoch": 7.537928645652927, "grad_norm": 0.09165678918361664, "learning_rate": 2.4618585298196953e-06, "loss": 0.002, "step": 10881 }, { "epoch": 7.538621406304122, "grad_norm": 0.05164121091365814, "learning_rate": 2.4611650485436894e-06, "loss": 0.0013, "step": 10882 }, { "epoch": 7.539314166955317, "grad_norm": 0.10606106370687485, "learning_rate": 2.460471567267684e-06, "loss": 0.0018, "step": 10883 }, { "epoch": 7.540006927606512, "grad_norm": 0.20772795379161835, "learning_rate": 2.4597780859916784e-06, "loss": 0.0024, "step": 10884 }, { "epoch": 7.540699688257707, "grad_norm": 0.09767861664295197, "learning_rate": 2.459084604715673e-06, "loss": 0.0015, "step": 10885 }, { "epoch": 7.541392448908902, "grad_norm": 0.06639274209737778, "learning_rate": 2.4583911234396674e-06, "loss": 0.0013, "step": 10886 }, { "epoch": 7.542085209560097, "grad_norm": 0.07911796867847443, "learning_rate": 2.4576976421636615e-06, "loss": 0.0015, "step": 10887 }, { "epoch": 7.542777970211292, "grad_norm": 0.16360414028167725, "learning_rate": 2.4570041608876564e-06, "loss": 0.0021, "step": 10888 }, { "epoch": 7.543470730862487, "grad_norm": 0.1348550170660019, "learning_rate": 2.4563106796116505e-06, "loss": 0.0022, "step": 10889 }, { "epoch": 7.544163491513682, "grad_norm": 0.1356428563594818, "learning_rate": 2.4556171983356454e-06, "loss": 0.0017, "step": 10890 }, { "epoch": 7.544856252164877, "grad_norm": 0.08549313992261887, "learning_rate": 2.4549237170596395e-06, "loss": 0.0015, "step": 10891 }, { "epoch": 7.545549012816072, "grad_norm": 0.17838850617408752, "learning_rate": 2.454230235783634e-06, "loss": 0.0016, "step": 10892 }, { "epoch": 7.546241773467267, "grad_norm": 0.1340678632259369, "learning_rate": 2.4535367545076286e-06, "loss": 0.0019, "step": 10893 }, { "epoch": 7.546934534118462, "grad_norm": 0.06063477322459221, "learning_rate": 2.452843273231623e-06, "loss": 0.0012, "step": 10894 }, { "epoch": 7.547627294769657, "grad_norm": 0.3364317715167999, "learning_rate": 2.4521497919556176e-06, "loss": 0.0028, "step": 10895 }, { "epoch": 7.548320055420852, "grad_norm": 0.23646342754364014, "learning_rate": 2.4514563106796117e-06, "loss": 0.0017, "step": 10896 }, { "epoch": 7.549012816072047, "grad_norm": 0.25620752573013306, "learning_rate": 2.450762829403606e-06, "loss": 0.0029, "step": 10897 }, { "epoch": 7.549705576723242, "grad_norm": 0.5714073181152344, "learning_rate": 2.4500693481276007e-06, "loss": 0.003, "step": 10898 }, { "epoch": 7.550398337374437, "grad_norm": 0.14835543930530548, "learning_rate": 2.449375866851595e-06, "loss": 0.0017, "step": 10899 }, { "epoch": 7.5510910980256325, "grad_norm": 0.26132914423942566, "learning_rate": 2.4486823855755897e-06, "loss": 0.0025, "step": 10900 }, { "epoch": 7.551783858676827, "grad_norm": 0.4453752934932709, "learning_rate": 2.447988904299584e-06, "loss": 0.0034, "step": 10901 }, { "epoch": 7.552476619328022, "grad_norm": 0.15118376910686493, "learning_rate": 2.4472954230235783e-06, "loss": 0.0018, "step": 10902 }, { "epoch": 7.5531693799792174, "grad_norm": 0.11513940989971161, "learning_rate": 2.4466019417475732e-06, "loss": 0.0017, "step": 10903 }, { "epoch": 7.553862140630412, "grad_norm": 0.4208213984966278, "learning_rate": 2.4459084604715673e-06, "loss": 0.0028, "step": 10904 }, { "epoch": 7.554554901281607, "grad_norm": 0.06580349802970886, "learning_rate": 2.4452149791955623e-06, "loss": 0.0014, "step": 10905 }, { "epoch": 7.555247661932802, "grad_norm": 0.14686036109924316, "learning_rate": 2.4445214979195563e-06, "loss": 0.0015, "step": 10906 }, { "epoch": 7.555940422583998, "grad_norm": 0.22417961061000824, "learning_rate": 2.443828016643551e-06, "loss": 0.003, "step": 10907 }, { "epoch": 7.556633183235192, "grad_norm": 0.12621311843395233, "learning_rate": 2.4431345353675454e-06, "loss": 0.0023, "step": 10908 }, { "epoch": 7.557325943886387, "grad_norm": 0.1350223869085312, "learning_rate": 2.44244105409154e-06, "loss": 0.0018, "step": 10909 }, { "epoch": 7.558018704537583, "grad_norm": 0.2834029793739319, "learning_rate": 2.4417475728155344e-06, "loss": 0.0027, "step": 10910 }, { "epoch": 7.558711465188777, "grad_norm": 0.21382243931293488, "learning_rate": 2.4410540915395285e-06, "loss": 0.003, "step": 10911 }, { "epoch": 7.559404225839972, "grad_norm": 0.1368580311536789, "learning_rate": 2.440360610263523e-06, "loss": 0.0022, "step": 10912 }, { "epoch": 7.560096986491168, "grad_norm": 0.10384763777256012, "learning_rate": 2.4396671289875175e-06, "loss": 0.0016, "step": 10913 }, { "epoch": 7.560789747142362, "grad_norm": 0.28811296820640564, "learning_rate": 2.438973647711512e-06, "loss": 0.0025, "step": 10914 }, { "epoch": 7.561482507793557, "grad_norm": 0.160031259059906, "learning_rate": 2.4382801664355065e-06, "loss": 0.0018, "step": 10915 }, { "epoch": 7.5621752684447525, "grad_norm": 0.27531707286834717, "learning_rate": 2.437586685159501e-06, "loss": 0.0022, "step": 10916 }, { "epoch": 7.562868029095947, "grad_norm": 0.3410033881664276, "learning_rate": 2.436893203883495e-06, "loss": 0.0032, "step": 10917 }, { "epoch": 7.563560789747142, "grad_norm": 0.12519578635692596, "learning_rate": 2.43619972260749e-06, "loss": 0.0018, "step": 10918 }, { "epoch": 7.5642535503983375, "grad_norm": 0.1801953762769699, "learning_rate": 2.435506241331484e-06, "loss": 0.0017, "step": 10919 }, { "epoch": 7.564946311049533, "grad_norm": 0.07746082544326782, "learning_rate": 2.4348127600554786e-06, "loss": 0.0013, "step": 10920 }, { "epoch": 7.565639071700727, "grad_norm": 0.08023972064256668, "learning_rate": 2.434119278779473e-06, "loss": 0.0015, "step": 10921 }, { "epoch": 7.566331832351922, "grad_norm": 0.218221977353096, "learning_rate": 2.4334257975034676e-06, "loss": 0.0018, "step": 10922 }, { "epoch": 7.567024593003118, "grad_norm": 0.4734841585159302, "learning_rate": 2.432732316227462e-06, "loss": 0.0029, "step": 10923 }, { "epoch": 7.567717353654312, "grad_norm": 0.27004826068878174, "learning_rate": 2.4320388349514567e-06, "loss": 0.0024, "step": 10924 }, { "epoch": 7.568410114305507, "grad_norm": 0.17549893260002136, "learning_rate": 2.4313453536754508e-06, "loss": 0.0019, "step": 10925 }, { "epoch": 7.569102874956703, "grad_norm": 0.1410718560218811, "learning_rate": 2.4306518723994453e-06, "loss": 0.0016, "step": 10926 }, { "epoch": 7.569795635607898, "grad_norm": 0.0840911790728569, "learning_rate": 2.4299583911234398e-06, "loss": 0.0015, "step": 10927 }, { "epoch": 7.570488396259092, "grad_norm": 0.17833329737186432, "learning_rate": 2.4292649098474343e-06, "loss": 0.0025, "step": 10928 }, { "epoch": 7.571181156910288, "grad_norm": 0.1955181509256363, "learning_rate": 2.428571428571429e-06, "loss": 0.0024, "step": 10929 }, { "epoch": 7.571873917561483, "grad_norm": 0.2516569495201111, "learning_rate": 2.427877947295423e-06, "loss": 0.0027, "step": 10930 }, { "epoch": 7.572566678212677, "grad_norm": 0.1913844347000122, "learning_rate": 2.427184466019418e-06, "loss": 0.0026, "step": 10931 }, { "epoch": 7.5732594388638725, "grad_norm": 0.11958225071430206, "learning_rate": 2.426490984743412e-06, "loss": 0.0018, "step": 10932 }, { "epoch": 7.573952199515068, "grad_norm": 0.11717060208320618, "learning_rate": 2.425797503467407e-06, "loss": 0.0016, "step": 10933 }, { "epoch": 7.574644960166262, "grad_norm": 0.3873917758464813, "learning_rate": 2.425104022191401e-06, "loss": 0.002, "step": 10934 }, { "epoch": 7.5753377208174575, "grad_norm": 0.17179837822914124, "learning_rate": 2.4244105409153954e-06, "loss": 0.002, "step": 10935 }, { "epoch": 7.576030481468653, "grad_norm": 0.1312446892261505, "learning_rate": 2.42371705963939e-06, "loss": 0.0019, "step": 10936 }, { "epoch": 7.576723242119847, "grad_norm": 0.08735194057226181, "learning_rate": 2.4230235783633844e-06, "loss": 0.0014, "step": 10937 }, { "epoch": 7.577416002771042, "grad_norm": 0.3911072611808777, "learning_rate": 2.422330097087379e-06, "loss": 0.0021, "step": 10938 }, { "epoch": 7.578108763422238, "grad_norm": 0.14175733923912048, "learning_rate": 2.4216366158113735e-06, "loss": 0.003, "step": 10939 }, { "epoch": 7.578801524073433, "grad_norm": 0.2668286859989166, "learning_rate": 2.4209431345353676e-06, "loss": 0.0022, "step": 10940 }, { "epoch": 7.579494284724627, "grad_norm": 0.12190745025873184, "learning_rate": 2.420249653259362e-06, "loss": 0.0015, "step": 10941 }, { "epoch": 7.580187045375823, "grad_norm": 0.1163494661450386, "learning_rate": 2.4195561719833566e-06, "loss": 0.0015, "step": 10942 }, { "epoch": 7.580879806027018, "grad_norm": 0.10534106194972992, "learning_rate": 2.418862690707351e-06, "loss": 0.0019, "step": 10943 }, { "epoch": 7.581572566678212, "grad_norm": 0.13300900161266327, "learning_rate": 2.4181692094313456e-06, "loss": 0.0019, "step": 10944 }, { "epoch": 7.582265327329408, "grad_norm": 0.10963454842567444, "learning_rate": 2.4174757281553397e-06, "loss": 0.0016, "step": 10945 }, { "epoch": 7.582958087980603, "grad_norm": 1.2136335372924805, "learning_rate": 2.4167822468793346e-06, "loss": 0.0039, "step": 10946 }, { "epoch": 7.583650848631798, "grad_norm": 0.12151437252759933, "learning_rate": 2.4160887656033287e-06, "loss": 0.0019, "step": 10947 }, { "epoch": 7.584343609282993, "grad_norm": 0.09788265824317932, "learning_rate": 2.4153952843273236e-06, "loss": 0.0016, "step": 10948 }, { "epoch": 7.585036369934188, "grad_norm": 0.09877084195613861, "learning_rate": 2.4147018030513177e-06, "loss": 0.0016, "step": 10949 }, { "epoch": 7.585729130585383, "grad_norm": 0.06931530684232712, "learning_rate": 2.4140083217753122e-06, "loss": 0.0015, "step": 10950 }, { "epoch": 7.5864218912365775, "grad_norm": 0.16568750143051147, "learning_rate": 2.4133148404993067e-06, "loss": 0.0027, "step": 10951 }, { "epoch": 7.587114651887773, "grad_norm": 0.21498282253742218, "learning_rate": 2.4126213592233013e-06, "loss": 0.0019, "step": 10952 }, { "epoch": 7.587807412538968, "grad_norm": 0.236845925450325, "learning_rate": 2.4119278779472958e-06, "loss": 0.0015, "step": 10953 }, { "epoch": 7.5885001731901625, "grad_norm": 0.12705692648887634, "learning_rate": 2.41123439667129e-06, "loss": 0.0013, "step": 10954 }, { "epoch": 7.589192933841358, "grad_norm": 0.21156752109527588, "learning_rate": 2.4105409153952844e-06, "loss": 0.0019, "step": 10955 }, { "epoch": 7.589885694492553, "grad_norm": 0.15537095069885254, "learning_rate": 2.409847434119279e-06, "loss": 0.0018, "step": 10956 }, { "epoch": 7.590578455143747, "grad_norm": 0.22730201482772827, "learning_rate": 2.4091539528432734e-06, "loss": 0.0031, "step": 10957 }, { "epoch": 7.591271215794943, "grad_norm": 0.19640260934829712, "learning_rate": 2.408460471567268e-06, "loss": 0.0021, "step": 10958 }, { "epoch": 7.591963976446138, "grad_norm": 0.10736030340194702, "learning_rate": 2.4077669902912624e-06, "loss": 0.0019, "step": 10959 }, { "epoch": 7.592656737097333, "grad_norm": 0.07244516909122467, "learning_rate": 2.4070735090152565e-06, "loss": 0.0013, "step": 10960 }, { "epoch": 7.593349497748528, "grad_norm": 0.4546343982219696, "learning_rate": 2.4063800277392514e-06, "loss": 0.0028, "step": 10961 }, { "epoch": 7.594042258399723, "grad_norm": 0.13772660493850708, "learning_rate": 2.4056865464632455e-06, "loss": 0.0022, "step": 10962 }, { "epoch": 7.594735019050918, "grad_norm": 0.1508208066225052, "learning_rate": 2.4049930651872404e-06, "loss": 0.0018, "step": 10963 }, { "epoch": 7.595427779702113, "grad_norm": 0.21987207233905792, "learning_rate": 2.4042995839112345e-06, "loss": 0.002, "step": 10964 }, { "epoch": 7.596120540353308, "grad_norm": 0.12725728750228882, "learning_rate": 2.403606102635229e-06, "loss": 0.0017, "step": 10965 }, { "epoch": 7.596813301004503, "grad_norm": 0.11235237866640091, "learning_rate": 2.4029126213592235e-06, "loss": 0.0018, "step": 10966 }, { "epoch": 7.597506061655698, "grad_norm": 0.1461298167705536, "learning_rate": 2.402219140083218e-06, "loss": 0.0022, "step": 10967 }, { "epoch": 7.598198822306893, "grad_norm": 0.10823129862546921, "learning_rate": 2.4015256588072126e-06, "loss": 0.0017, "step": 10968 }, { "epoch": 7.598891582958088, "grad_norm": 0.2289755642414093, "learning_rate": 2.4008321775312066e-06, "loss": 0.0021, "step": 10969 }, { "epoch": 7.599584343609283, "grad_norm": 0.40313395857810974, "learning_rate": 2.400138696255201e-06, "loss": 0.0029, "step": 10970 }, { "epoch": 7.600277104260478, "grad_norm": 0.07987464964389801, "learning_rate": 2.3994452149791957e-06, "loss": 0.0016, "step": 10971 }, { "epoch": 7.600969864911673, "grad_norm": 0.09876357764005661, "learning_rate": 2.39875173370319e-06, "loss": 0.0017, "step": 10972 }, { "epoch": 7.601662625562868, "grad_norm": 0.08993715047836304, "learning_rate": 2.3980582524271847e-06, "loss": 0.0017, "step": 10973 }, { "epoch": 7.602355386214063, "grad_norm": 0.1425146609544754, "learning_rate": 2.397364771151179e-06, "loss": 0.0019, "step": 10974 }, { "epoch": 7.603048146865258, "grad_norm": 0.5991483926773071, "learning_rate": 2.3966712898751733e-06, "loss": 0.0032, "step": 10975 }, { "epoch": 7.603740907516453, "grad_norm": 0.11611495912075043, "learning_rate": 2.3959778085991682e-06, "loss": 0.0014, "step": 10976 }, { "epoch": 7.604433668167648, "grad_norm": 0.11563742905855179, "learning_rate": 2.3952843273231623e-06, "loss": 0.0021, "step": 10977 }, { "epoch": 7.605126428818843, "grad_norm": 0.15784800052642822, "learning_rate": 2.394590846047157e-06, "loss": 0.0017, "step": 10978 }, { "epoch": 7.605819189470038, "grad_norm": 0.12296920269727707, "learning_rate": 2.3938973647711513e-06, "loss": 0.0014, "step": 10979 }, { "epoch": 7.6065119501212335, "grad_norm": 0.2830219566822052, "learning_rate": 2.393203883495146e-06, "loss": 0.0018, "step": 10980 }, { "epoch": 7.607204710772428, "grad_norm": 0.11406715214252472, "learning_rate": 2.3925104022191403e-06, "loss": 0.0021, "step": 10981 }, { "epoch": 7.607897471423623, "grad_norm": 0.1440214216709137, "learning_rate": 2.391816920943135e-06, "loss": 0.0016, "step": 10982 }, { "epoch": 7.608590232074818, "grad_norm": 0.17547141015529633, "learning_rate": 2.3911234396671294e-06, "loss": 0.002, "step": 10983 }, { "epoch": 7.609282992726013, "grad_norm": 0.18593452870845795, "learning_rate": 2.3904299583911235e-06, "loss": 0.0019, "step": 10984 }, { "epoch": 7.609975753377208, "grad_norm": 0.07089821994304657, "learning_rate": 2.389736477115118e-06, "loss": 0.0014, "step": 10985 }, { "epoch": 7.610668514028403, "grad_norm": 0.10841097682714462, "learning_rate": 2.3890429958391125e-06, "loss": 0.0015, "step": 10986 }, { "epoch": 7.611361274679599, "grad_norm": 0.14363816380500793, "learning_rate": 2.388349514563107e-06, "loss": 0.0019, "step": 10987 }, { "epoch": 7.612054035330793, "grad_norm": 0.16981837153434753, "learning_rate": 2.3876560332871015e-06, "loss": 0.0019, "step": 10988 }, { "epoch": 7.612746795981988, "grad_norm": 0.22910691797733307, "learning_rate": 2.386962552011096e-06, "loss": 0.0026, "step": 10989 }, { "epoch": 7.613439556633184, "grad_norm": 0.2676429748535156, "learning_rate": 2.38626907073509e-06, "loss": 0.0018, "step": 10990 }, { "epoch": 7.614132317284378, "grad_norm": 0.09594810754060745, "learning_rate": 2.385575589459085e-06, "loss": 0.0017, "step": 10991 }, { "epoch": 7.614825077935573, "grad_norm": 0.16593818366527557, "learning_rate": 2.384882108183079e-06, "loss": 0.0029, "step": 10992 }, { "epoch": 7.615517838586769, "grad_norm": 0.07593372464179993, "learning_rate": 2.3841886269070736e-06, "loss": 0.0015, "step": 10993 }, { "epoch": 7.616210599237963, "grad_norm": 0.12001953274011612, "learning_rate": 2.383495145631068e-06, "loss": 0.0014, "step": 10994 }, { "epoch": 7.616903359889158, "grad_norm": 0.09538743644952774, "learning_rate": 2.3828016643550626e-06, "loss": 0.0018, "step": 10995 }, { "epoch": 7.6175961205403535, "grad_norm": 0.06194775551557541, "learning_rate": 2.382108183079057e-06, "loss": 0.0012, "step": 10996 }, { "epoch": 7.618288881191548, "grad_norm": 0.12936419248580933, "learning_rate": 2.3814147018030517e-06, "loss": 0.0014, "step": 10997 }, { "epoch": 7.618981641842743, "grad_norm": 0.08678889274597168, "learning_rate": 2.380721220527046e-06, "loss": 0.0016, "step": 10998 }, { "epoch": 7.6196744024939385, "grad_norm": 0.09448511898517609, "learning_rate": 2.3800277392510403e-06, "loss": 0.0015, "step": 10999 }, { "epoch": 7.620367163145134, "grad_norm": 0.2024964988231659, "learning_rate": 2.3793342579750348e-06, "loss": 0.0021, "step": 11000 }, { "epoch": 7.621059923796328, "grad_norm": 0.12435255944728851, "learning_rate": 2.3786407766990293e-06, "loss": 0.0019, "step": 11001 }, { "epoch": 7.621752684447523, "grad_norm": 0.20043282210826874, "learning_rate": 2.3779472954230238e-06, "loss": 0.0029, "step": 11002 }, { "epoch": 7.622445445098719, "grad_norm": 0.15249516069889069, "learning_rate": 2.3772538141470183e-06, "loss": 0.0024, "step": 11003 }, { "epoch": 7.623138205749913, "grad_norm": 0.19048525393009186, "learning_rate": 2.376560332871013e-06, "loss": 0.0018, "step": 11004 }, { "epoch": 7.623830966401108, "grad_norm": 0.1399148851633072, "learning_rate": 2.375866851595007e-06, "loss": 0.0017, "step": 11005 }, { "epoch": 7.624523727052304, "grad_norm": 0.3415045142173767, "learning_rate": 2.375173370319002e-06, "loss": 0.0044, "step": 11006 }, { "epoch": 7.625216487703499, "grad_norm": 0.05213839188218117, "learning_rate": 2.374479889042996e-06, "loss": 0.0011, "step": 11007 }, { "epoch": 7.625909248354693, "grad_norm": 0.1701638102531433, "learning_rate": 2.3737864077669904e-06, "loss": 0.0025, "step": 11008 }, { "epoch": 7.626602009005889, "grad_norm": 0.16347801685333252, "learning_rate": 2.373092926490985e-06, "loss": 0.0026, "step": 11009 }, { "epoch": 7.627294769657084, "grad_norm": 0.07998019456863403, "learning_rate": 2.3723994452149794e-06, "loss": 0.0015, "step": 11010 }, { "epoch": 7.627987530308278, "grad_norm": 0.11584939807653427, "learning_rate": 2.371705963938974e-06, "loss": 0.0017, "step": 11011 }, { "epoch": 7.6286802909594735, "grad_norm": 0.2052362859249115, "learning_rate": 2.371012482662968e-06, "loss": 0.0018, "step": 11012 }, { "epoch": 7.629373051610669, "grad_norm": 0.6242120265960693, "learning_rate": 2.370319001386963e-06, "loss": 0.0022, "step": 11013 }, { "epoch": 7.630065812261863, "grad_norm": 0.18045473098754883, "learning_rate": 2.369625520110957e-06, "loss": 0.0018, "step": 11014 }, { "epoch": 7.6307585729130585, "grad_norm": 0.3712784945964813, "learning_rate": 2.3689320388349516e-06, "loss": 0.0028, "step": 11015 }, { "epoch": 7.631451333564254, "grad_norm": 0.12799879908561707, "learning_rate": 2.368238557558946e-06, "loss": 0.0017, "step": 11016 }, { "epoch": 7.632144094215448, "grad_norm": 0.10460387915372849, "learning_rate": 2.3675450762829406e-06, "loss": 0.0017, "step": 11017 }, { "epoch": 7.632836854866643, "grad_norm": 0.13728325068950653, "learning_rate": 2.366851595006935e-06, "loss": 0.0021, "step": 11018 }, { "epoch": 7.633529615517839, "grad_norm": 0.23726387321949005, "learning_rate": 2.3661581137309296e-06, "loss": 0.0018, "step": 11019 }, { "epoch": 7.634222376169034, "grad_norm": 0.3002427816390991, "learning_rate": 2.3654646324549237e-06, "loss": 0.0022, "step": 11020 }, { "epoch": 7.634915136820228, "grad_norm": 0.18956208229064941, "learning_rate": 2.3647711511789186e-06, "loss": 0.0018, "step": 11021 }, { "epoch": 7.635607897471424, "grad_norm": 0.15530270338058472, "learning_rate": 2.3640776699029127e-06, "loss": 0.0015, "step": 11022 }, { "epoch": 7.636300658122619, "grad_norm": 0.14310528337955475, "learning_rate": 2.3633841886269072e-06, "loss": 0.0026, "step": 11023 }, { "epoch": 7.636993418773813, "grad_norm": 0.11291219294071198, "learning_rate": 2.3626907073509017e-06, "loss": 0.0016, "step": 11024 }, { "epoch": 7.637686179425009, "grad_norm": 0.12167590856552124, "learning_rate": 2.3619972260748962e-06, "loss": 0.0017, "step": 11025 }, { "epoch": 7.638378940076204, "grad_norm": 0.13032810389995575, "learning_rate": 2.3613037447988907e-06, "loss": 0.0014, "step": 11026 }, { "epoch": 7.639071700727399, "grad_norm": 0.15263308584690094, "learning_rate": 2.360610263522885e-06, "loss": 0.0018, "step": 11027 }, { "epoch": 7.6397644613785936, "grad_norm": 0.14499661326408386, "learning_rate": 2.3599167822468798e-06, "loss": 0.0017, "step": 11028 }, { "epoch": 7.640457222029789, "grad_norm": 0.1629236340522766, "learning_rate": 2.359223300970874e-06, "loss": 0.0024, "step": 11029 }, { "epoch": 7.641149982680984, "grad_norm": 0.23310644924640656, "learning_rate": 2.3585298196948684e-06, "loss": 0.0024, "step": 11030 }, { "epoch": 7.6418427433321785, "grad_norm": 0.26413801312446594, "learning_rate": 2.357836338418863e-06, "loss": 0.0019, "step": 11031 }, { "epoch": 7.642535503983374, "grad_norm": 0.1381988525390625, "learning_rate": 2.3571428571428574e-06, "loss": 0.0019, "step": 11032 }, { "epoch": 7.643228264634569, "grad_norm": 0.20399077236652374, "learning_rate": 2.356449375866852e-06, "loss": 0.0021, "step": 11033 }, { "epoch": 7.6439210252857634, "grad_norm": 0.134286567568779, "learning_rate": 2.3557558945908464e-06, "loss": 0.0019, "step": 11034 }, { "epoch": 7.644613785936959, "grad_norm": 0.15523633360862732, "learning_rate": 2.3550624133148405e-06, "loss": 0.0017, "step": 11035 }, { "epoch": 7.645306546588154, "grad_norm": 0.1477729082107544, "learning_rate": 2.354368932038835e-06, "loss": 0.0018, "step": 11036 }, { "epoch": 7.645999307239348, "grad_norm": 0.15880073606967926, "learning_rate": 2.3536754507628295e-06, "loss": 0.002, "step": 11037 }, { "epoch": 7.646692067890544, "grad_norm": 0.22143501043319702, "learning_rate": 2.352981969486824e-06, "loss": 0.0023, "step": 11038 }, { "epoch": 7.647384828541739, "grad_norm": 0.06278867274522781, "learning_rate": 2.3522884882108185e-06, "loss": 0.0013, "step": 11039 }, { "epoch": 7.648077589192933, "grad_norm": 0.0775415450334549, "learning_rate": 2.351595006934813e-06, "loss": 0.0014, "step": 11040 }, { "epoch": 7.648770349844129, "grad_norm": 0.31963226199150085, "learning_rate": 2.3509015256588075e-06, "loss": 0.0029, "step": 11041 }, { "epoch": 7.649463110495324, "grad_norm": 0.07944449782371521, "learning_rate": 2.3502080443828016e-06, "loss": 0.0015, "step": 11042 }, { "epoch": 7.650155871146519, "grad_norm": 0.06306667625904083, "learning_rate": 2.3495145631067966e-06, "loss": 0.0014, "step": 11043 }, { "epoch": 7.650848631797714, "grad_norm": 0.13178983330726624, "learning_rate": 2.3488210818307907e-06, "loss": 0.0015, "step": 11044 }, { "epoch": 7.651541392448909, "grad_norm": 0.09785177558660507, "learning_rate": 2.348127600554785e-06, "loss": 0.0017, "step": 11045 }, { "epoch": 7.652234153100104, "grad_norm": 0.235648974776268, "learning_rate": 2.3474341192787797e-06, "loss": 0.0038, "step": 11046 }, { "epoch": 7.652926913751299, "grad_norm": 0.0811808779835701, "learning_rate": 2.346740638002774e-06, "loss": 0.0015, "step": 11047 }, { "epoch": 7.653619674402494, "grad_norm": 0.2671070992946625, "learning_rate": 2.3460471567267687e-06, "loss": 0.0032, "step": 11048 }, { "epoch": 7.654312435053689, "grad_norm": 0.06746388226747513, "learning_rate": 2.345353675450763e-06, "loss": 0.0013, "step": 11049 }, { "epoch": 7.655005195704884, "grad_norm": 0.1766207218170166, "learning_rate": 2.3446601941747573e-06, "loss": 0.0026, "step": 11050 }, { "epoch": 7.655697956356079, "grad_norm": 0.095545694231987, "learning_rate": 2.343966712898752e-06, "loss": 0.0013, "step": 11051 }, { "epoch": 7.656390717007274, "grad_norm": 0.14303840696811676, "learning_rate": 2.3432732316227463e-06, "loss": 0.0025, "step": 11052 }, { "epoch": 7.657083477658469, "grad_norm": 0.09510090947151184, "learning_rate": 2.342579750346741e-06, "loss": 0.0016, "step": 11053 }, { "epoch": 7.657776238309664, "grad_norm": 0.10317273437976837, "learning_rate": 2.3418862690707353e-06, "loss": 0.0016, "step": 11054 }, { "epoch": 7.658468998960859, "grad_norm": 0.14392735064029694, "learning_rate": 2.34119278779473e-06, "loss": 0.0018, "step": 11055 }, { "epoch": 7.659161759612054, "grad_norm": 0.13661417365074158, "learning_rate": 2.3404993065187244e-06, "loss": 0.002, "step": 11056 }, { "epoch": 7.659854520263249, "grad_norm": 0.5133945345878601, "learning_rate": 2.3398058252427184e-06, "loss": 0.0021, "step": 11057 }, { "epoch": 7.660547280914444, "grad_norm": 0.16885030269622803, "learning_rate": 2.3391123439667134e-06, "loss": 0.0034, "step": 11058 }, { "epoch": 7.661240041565639, "grad_norm": 0.17294436693191528, "learning_rate": 2.3384188626907075e-06, "loss": 0.0025, "step": 11059 }, { "epoch": 7.661932802216834, "grad_norm": 0.1565319448709488, "learning_rate": 2.337725381414702e-06, "loss": 0.002, "step": 11060 }, { "epoch": 7.662625562868029, "grad_norm": 0.19181755185127258, "learning_rate": 2.3370319001386965e-06, "loss": 0.0022, "step": 11061 }, { "epoch": 7.663318323519224, "grad_norm": 0.13065962493419647, "learning_rate": 2.336338418862691e-06, "loss": 0.0034, "step": 11062 }, { "epoch": 7.664011084170419, "grad_norm": 0.14209194481372833, "learning_rate": 2.3356449375866855e-06, "loss": 0.0021, "step": 11063 }, { "epoch": 7.664703844821614, "grad_norm": 0.06735049933195114, "learning_rate": 2.33495145631068e-06, "loss": 0.0013, "step": 11064 }, { "epoch": 7.665396605472809, "grad_norm": 0.17228339612483978, "learning_rate": 2.334257975034674e-06, "loss": 0.002, "step": 11065 }, { "epoch": 7.666089366124004, "grad_norm": 0.25487828254699707, "learning_rate": 2.3335644937586686e-06, "loss": 0.0023, "step": 11066 }, { "epoch": 7.6667821267752, "grad_norm": 0.11353092640638351, "learning_rate": 2.332871012482663e-06, "loss": 0.0021, "step": 11067 }, { "epoch": 7.667474887426394, "grad_norm": 0.11350497603416443, "learning_rate": 2.3321775312066576e-06, "loss": 0.0015, "step": 11068 }, { "epoch": 7.668167648077589, "grad_norm": 0.13392123579978943, "learning_rate": 2.331484049930652e-06, "loss": 0.0014, "step": 11069 }, { "epoch": 7.668860408728785, "grad_norm": 0.1440533995628357, "learning_rate": 2.3307905686546462e-06, "loss": 0.0023, "step": 11070 }, { "epoch": 7.669553169379979, "grad_norm": 0.6629919409751892, "learning_rate": 2.330097087378641e-06, "loss": 0.0022, "step": 11071 }, { "epoch": 7.670245930031174, "grad_norm": 0.06951063126325607, "learning_rate": 2.3294036061026352e-06, "loss": 0.0015, "step": 11072 }, { "epoch": 7.6709386906823696, "grad_norm": 0.14044804871082306, "learning_rate": 2.32871012482663e-06, "loss": 0.0022, "step": 11073 }, { "epoch": 7.671631451333564, "grad_norm": 0.1531040072441101, "learning_rate": 2.3280166435506243e-06, "loss": 0.0016, "step": 11074 }, { "epoch": 7.672324211984759, "grad_norm": 0.180302694439888, "learning_rate": 2.3273231622746188e-06, "loss": 0.0018, "step": 11075 }, { "epoch": 7.6730169726359545, "grad_norm": 0.17087845504283905, "learning_rate": 2.3266296809986133e-06, "loss": 0.0023, "step": 11076 }, { "epoch": 7.673709733287149, "grad_norm": 0.44742295145988464, "learning_rate": 2.3259361997226078e-06, "loss": 0.0031, "step": 11077 }, { "epoch": 7.674402493938344, "grad_norm": 0.18731926381587982, "learning_rate": 2.3252427184466023e-06, "loss": 0.0033, "step": 11078 }, { "epoch": 7.6750952545895395, "grad_norm": 0.24815206229686737, "learning_rate": 2.324549237170597e-06, "loss": 0.0026, "step": 11079 }, { "epoch": 7.675788015240734, "grad_norm": 0.08992894738912582, "learning_rate": 2.323855755894591e-06, "loss": 0.0014, "step": 11080 }, { "epoch": 7.676480775891929, "grad_norm": 0.11693129688501358, "learning_rate": 2.3231622746185854e-06, "loss": 0.0021, "step": 11081 }, { "epoch": 7.677173536543124, "grad_norm": 0.10862977802753448, "learning_rate": 2.32246879334258e-06, "loss": 0.0017, "step": 11082 }, { "epoch": 7.67786629719432, "grad_norm": 0.07881202548742294, "learning_rate": 2.3217753120665744e-06, "loss": 0.0015, "step": 11083 }, { "epoch": 7.678559057845514, "grad_norm": 0.09780140966176987, "learning_rate": 2.321081830790569e-06, "loss": 0.0019, "step": 11084 }, { "epoch": 7.679251818496709, "grad_norm": 0.09575121849775314, "learning_rate": 2.320388349514563e-06, "loss": 0.0018, "step": 11085 }, { "epoch": 7.679944579147905, "grad_norm": 0.33804455399513245, "learning_rate": 2.319694868238558e-06, "loss": 0.0018, "step": 11086 }, { "epoch": 7.6806373397991, "grad_norm": 0.4046388268470764, "learning_rate": 2.319001386962552e-06, "loss": 0.0022, "step": 11087 }, { "epoch": 7.681330100450294, "grad_norm": 0.11567312479019165, "learning_rate": 2.318307905686547e-06, "loss": 0.0019, "step": 11088 }, { "epoch": 7.68202286110149, "grad_norm": 0.10589355230331421, "learning_rate": 2.317614424410541e-06, "loss": 0.0016, "step": 11089 }, { "epoch": 7.682715621752685, "grad_norm": 0.34544435143470764, "learning_rate": 2.3169209431345356e-06, "loss": 0.0029, "step": 11090 }, { "epoch": 7.683408382403879, "grad_norm": 0.14481580257415771, "learning_rate": 2.31622746185853e-06, "loss": 0.0019, "step": 11091 }, { "epoch": 7.6841011430550745, "grad_norm": 0.12427419424057007, "learning_rate": 2.3155339805825246e-06, "loss": 0.0021, "step": 11092 }, { "epoch": 7.68479390370627, "grad_norm": 0.11880215257406235, "learning_rate": 2.314840499306519e-06, "loss": 0.0013, "step": 11093 }, { "epoch": 7.685486664357464, "grad_norm": 0.1104602962732315, "learning_rate": 2.314147018030513e-06, "loss": 0.0017, "step": 11094 }, { "epoch": 7.6861794250086595, "grad_norm": 0.11313404887914658, "learning_rate": 2.3134535367545077e-06, "loss": 0.0015, "step": 11095 }, { "epoch": 7.686872185659855, "grad_norm": 0.1580561250448227, "learning_rate": 2.312760055478502e-06, "loss": 0.0018, "step": 11096 }, { "epoch": 7.687564946311049, "grad_norm": 0.0821509137749672, "learning_rate": 2.3120665742024967e-06, "loss": 0.0019, "step": 11097 }, { "epoch": 7.688257706962244, "grad_norm": 0.32258695363998413, "learning_rate": 2.3113730929264912e-06, "loss": 0.0021, "step": 11098 }, { "epoch": 7.68895046761344, "grad_norm": 0.1450721025466919, "learning_rate": 2.3106796116504857e-06, "loss": 0.0016, "step": 11099 }, { "epoch": 7.689643228264634, "grad_norm": 0.37808188796043396, "learning_rate": 2.30998613037448e-06, "loss": 0.0057, "step": 11100 }, { "epoch": 7.690335988915829, "grad_norm": 0.09213859587907791, "learning_rate": 2.3092926490984748e-06, "loss": 0.0019, "step": 11101 }, { "epoch": 7.691028749567025, "grad_norm": 0.08755435794591904, "learning_rate": 2.308599167822469e-06, "loss": 0.0017, "step": 11102 }, { "epoch": 7.69172151021822, "grad_norm": 0.28027504682540894, "learning_rate": 2.3079056865464634e-06, "loss": 0.0017, "step": 11103 }, { "epoch": 7.692414270869414, "grad_norm": 0.41618475317955017, "learning_rate": 2.307212205270458e-06, "loss": 0.0018, "step": 11104 }, { "epoch": 7.69310703152061, "grad_norm": 0.33685627579689026, "learning_rate": 2.3065187239944524e-06, "loss": 0.002, "step": 11105 }, { "epoch": 7.693799792171805, "grad_norm": 0.18762889504432678, "learning_rate": 2.305825242718447e-06, "loss": 0.0016, "step": 11106 }, { "epoch": 7.694492552822999, "grad_norm": 0.10574682801961899, "learning_rate": 2.3051317614424414e-06, "loss": 0.0018, "step": 11107 }, { "epoch": 7.6951853134741945, "grad_norm": 0.12032246589660645, "learning_rate": 2.3044382801664355e-06, "loss": 0.0015, "step": 11108 }, { "epoch": 7.69587807412539, "grad_norm": 0.1229749470949173, "learning_rate": 2.30374479889043e-06, "loss": 0.0019, "step": 11109 }, { "epoch": 7.696570834776585, "grad_norm": 0.17300957441329956, "learning_rate": 2.3030513176144245e-06, "loss": 0.0019, "step": 11110 }, { "epoch": 7.6972635954277795, "grad_norm": 0.09111493080854416, "learning_rate": 2.302357836338419e-06, "loss": 0.0017, "step": 11111 }, { "epoch": 7.697956356078975, "grad_norm": 0.3462311029434204, "learning_rate": 2.3016643550624135e-06, "loss": 0.0024, "step": 11112 }, { "epoch": 7.69864911673017, "grad_norm": 0.14243848621845245, "learning_rate": 2.300970873786408e-06, "loss": 0.0017, "step": 11113 }, { "epoch": 7.699341877381364, "grad_norm": 0.07458194345235825, "learning_rate": 2.3002773925104025e-06, "loss": 0.0017, "step": 11114 }, { "epoch": 7.70003463803256, "grad_norm": 0.12619180977344513, "learning_rate": 2.2995839112343966e-06, "loss": 0.0017, "step": 11115 }, { "epoch": 7.700727398683755, "grad_norm": 0.14020708203315735, "learning_rate": 2.2988904299583916e-06, "loss": 0.0034, "step": 11116 }, { "epoch": 7.701420159334949, "grad_norm": 0.10561706870794296, "learning_rate": 2.2981969486823856e-06, "loss": 0.0019, "step": 11117 }, { "epoch": 7.702112919986145, "grad_norm": 0.1736873984336853, "learning_rate": 2.29750346740638e-06, "loss": 0.0019, "step": 11118 }, { "epoch": 7.70280568063734, "grad_norm": 0.12339480966329575, "learning_rate": 2.2968099861303747e-06, "loss": 0.0019, "step": 11119 }, { "epoch": 7.703498441288534, "grad_norm": 0.1351640671491623, "learning_rate": 2.296116504854369e-06, "loss": 0.0017, "step": 11120 }, { "epoch": 7.70419120193973, "grad_norm": 0.15358518064022064, "learning_rate": 2.2954230235783637e-06, "loss": 0.002, "step": 11121 }, { "epoch": 7.704883962590925, "grad_norm": 0.24224039912223816, "learning_rate": 2.294729542302358e-06, "loss": 0.003, "step": 11122 }, { "epoch": 7.70557672324212, "grad_norm": 0.1176552027463913, "learning_rate": 2.2940360610263523e-06, "loss": 0.0017, "step": 11123 }, { "epoch": 7.706269483893315, "grad_norm": 0.2175241857767105, "learning_rate": 2.293342579750347e-06, "loss": 0.0027, "step": 11124 }, { "epoch": 7.70696224454451, "grad_norm": 0.07370300590991974, "learning_rate": 2.2926490984743413e-06, "loss": 0.0014, "step": 11125 }, { "epoch": 7.707655005195705, "grad_norm": 0.0793105959892273, "learning_rate": 2.291955617198336e-06, "loss": 0.0013, "step": 11126 }, { "epoch": 7.7083477658468995, "grad_norm": 0.12147712707519531, "learning_rate": 2.2912621359223303e-06, "loss": 0.0021, "step": 11127 }, { "epoch": 7.709040526498095, "grad_norm": 0.33362194895744324, "learning_rate": 2.2905686546463244e-06, "loss": 0.002, "step": 11128 }, { "epoch": 7.70973328714929, "grad_norm": 0.05313370004296303, "learning_rate": 2.2898751733703193e-06, "loss": 0.0014, "step": 11129 }, { "epoch": 7.710426047800485, "grad_norm": 0.08593767136335373, "learning_rate": 2.2891816920943134e-06, "loss": 0.0015, "step": 11130 }, { "epoch": 7.71111880845168, "grad_norm": 0.12806203961372375, "learning_rate": 2.2884882108183084e-06, "loss": 0.0019, "step": 11131 }, { "epoch": 7.711811569102875, "grad_norm": 0.13096466660499573, "learning_rate": 2.2877947295423024e-06, "loss": 0.0016, "step": 11132 }, { "epoch": 7.71250432975407, "grad_norm": 0.18911923468112946, "learning_rate": 2.287101248266297e-06, "loss": 0.0024, "step": 11133 }, { "epoch": 7.713197090405265, "grad_norm": 0.08166119456291199, "learning_rate": 2.2864077669902915e-06, "loss": 0.0015, "step": 11134 }, { "epoch": 7.71388985105646, "grad_norm": 0.07480023056268692, "learning_rate": 2.285714285714286e-06, "loss": 0.0015, "step": 11135 }, { "epoch": 7.714582611707655, "grad_norm": 0.11152496933937073, "learning_rate": 2.2850208044382805e-06, "loss": 0.0017, "step": 11136 }, { "epoch": 7.71527537235885, "grad_norm": 0.16495485603809357, "learning_rate": 2.284327323162275e-06, "loss": 0.0024, "step": 11137 }, { "epoch": 7.715968133010045, "grad_norm": 0.4652247130870819, "learning_rate": 2.283633841886269e-06, "loss": 0.0034, "step": 11138 }, { "epoch": 7.71666089366124, "grad_norm": 0.19577790796756744, "learning_rate": 2.2829403606102636e-06, "loss": 0.0016, "step": 11139 }, { "epoch": 7.717353654312435, "grad_norm": 0.25848260521888733, "learning_rate": 2.282246879334258e-06, "loss": 0.0017, "step": 11140 }, { "epoch": 7.71804641496363, "grad_norm": 0.13725721836090088, "learning_rate": 2.2815533980582526e-06, "loss": 0.0018, "step": 11141 }, { "epoch": 7.718739175614825, "grad_norm": 0.1336439996957779, "learning_rate": 2.280859916782247e-06, "loss": 0.0015, "step": 11142 }, { "epoch": 7.71943193626602, "grad_norm": 0.15603455901145935, "learning_rate": 2.280166435506241e-06, "loss": 0.002, "step": 11143 }, { "epoch": 7.720124696917215, "grad_norm": 0.10399144142866135, "learning_rate": 2.279472954230236e-06, "loss": 0.0015, "step": 11144 }, { "epoch": 7.72081745756841, "grad_norm": 0.10489049553871155, "learning_rate": 2.2787794729542302e-06, "loss": 0.0022, "step": 11145 }, { "epoch": 7.721510218219605, "grad_norm": 0.19018907845020294, "learning_rate": 2.278085991678225e-06, "loss": 0.0027, "step": 11146 }, { "epoch": 7.7222029788708, "grad_norm": 0.0948394238948822, "learning_rate": 2.2773925104022192e-06, "loss": 0.0019, "step": 11147 }, { "epoch": 7.722895739521995, "grad_norm": 0.18266138434410095, "learning_rate": 2.2766990291262138e-06, "loss": 0.0019, "step": 11148 }, { "epoch": 7.72358850017319, "grad_norm": 0.06652650237083435, "learning_rate": 2.2760055478502083e-06, "loss": 0.0014, "step": 11149 }, { "epoch": 7.724281260824386, "grad_norm": 0.14933249354362488, "learning_rate": 2.2753120665742028e-06, "loss": 0.0017, "step": 11150 }, { "epoch": 7.72497402147558, "grad_norm": 0.062310583889484406, "learning_rate": 2.2746185852981973e-06, "loss": 0.0012, "step": 11151 }, { "epoch": 7.725666782126775, "grad_norm": 0.09098254144191742, "learning_rate": 2.2739251040221914e-06, "loss": 0.0017, "step": 11152 }, { "epoch": 7.7263595427779705, "grad_norm": 0.11707434058189392, "learning_rate": 2.273231622746186e-06, "loss": 0.0018, "step": 11153 }, { "epoch": 7.727052303429165, "grad_norm": 0.06647341698408127, "learning_rate": 2.2725381414701804e-06, "loss": 0.0014, "step": 11154 }, { "epoch": 7.72774506408036, "grad_norm": 0.27745726704597473, "learning_rate": 2.271844660194175e-06, "loss": 0.0022, "step": 11155 }, { "epoch": 7.7284378247315555, "grad_norm": 0.19227567315101624, "learning_rate": 2.2711511789181694e-06, "loss": 0.0017, "step": 11156 }, { "epoch": 7.72913058538275, "grad_norm": 0.42923104763031006, "learning_rate": 2.270457697642164e-06, "loss": 0.0024, "step": 11157 }, { "epoch": 7.729823346033945, "grad_norm": 0.1012384220957756, "learning_rate": 2.269764216366158e-06, "loss": 0.0018, "step": 11158 }, { "epoch": 7.73051610668514, "grad_norm": 0.22178469598293304, "learning_rate": 2.269070735090153e-06, "loss": 0.0022, "step": 11159 }, { "epoch": 7.731208867336335, "grad_norm": 0.11777793616056442, "learning_rate": 2.268377253814147e-06, "loss": 0.0017, "step": 11160 }, { "epoch": 7.73190162798753, "grad_norm": 0.42206209897994995, "learning_rate": 2.267683772538142e-06, "loss": 0.0016, "step": 11161 }, { "epoch": 7.732594388638725, "grad_norm": 0.0868501365184784, "learning_rate": 2.266990291262136e-06, "loss": 0.0015, "step": 11162 }, { "epoch": 7.733287149289921, "grad_norm": 0.1242530569434166, "learning_rate": 2.2662968099861306e-06, "loss": 0.0016, "step": 11163 }, { "epoch": 7.733979909941115, "grad_norm": 0.22645629942417145, "learning_rate": 2.265603328710125e-06, "loss": 0.0022, "step": 11164 }, { "epoch": 7.73467267059231, "grad_norm": 0.09080668538808823, "learning_rate": 2.2649098474341196e-06, "loss": 0.0016, "step": 11165 }, { "epoch": 7.735365431243506, "grad_norm": 0.14278453588485718, "learning_rate": 2.264216366158114e-06, "loss": 0.0022, "step": 11166 }, { "epoch": 7.7360581918947, "grad_norm": 0.21942207217216492, "learning_rate": 2.263522884882108e-06, "loss": 0.0024, "step": 11167 }, { "epoch": 7.736750952545895, "grad_norm": 0.07790365070104599, "learning_rate": 2.2628294036061027e-06, "loss": 0.0014, "step": 11168 }, { "epoch": 7.737443713197091, "grad_norm": 0.14562489092350006, "learning_rate": 2.262135922330097e-06, "loss": 0.0023, "step": 11169 }, { "epoch": 7.738136473848286, "grad_norm": 0.12146992236375809, "learning_rate": 2.2614424410540917e-06, "loss": 0.0018, "step": 11170 }, { "epoch": 7.73882923449948, "grad_norm": 0.10213583707809448, "learning_rate": 2.2607489597780862e-06, "loss": 0.0015, "step": 11171 }, { "epoch": 7.7395219951506755, "grad_norm": 0.2260013222694397, "learning_rate": 2.2600554785020807e-06, "loss": 0.0033, "step": 11172 }, { "epoch": 7.740214755801871, "grad_norm": 0.18813388049602509, "learning_rate": 2.259361997226075e-06, "loss": 0.0023, "step": 11173 }, { "epoch": 7.740907516453065, "grad_norm": 0.3306540846824646, "learning_rate": 2.2586685159500697e-06, "loss": 0.0027, "step": 11174 }, { "epoch": 7.7416002771042605, "grad_norm": 0.22028273344039917, "learning_rate": 2.257975034674064e-06, "loss": 0.0024, "step": 11175 }, { "epoch": 7.742293037755456, "grad_norm": 0.12116731703281403, "learning_rate": 2.2572815533980583e-06, "loss": 0.0017, "step": 11176 }, { "epoch": 7.74298579840665, "grad_norm": 0.09300985932350159, "learning_rate": 2.256588072122053e-06, "loss": 0.0017, "step": 11177 }, { "epoch": 7.743678559057845, "grad_norm": 0.06648532301187515, "learning_rate": 2.2558945908460474e-06, "loss": 0.0013, "step": 11178 }, { "epoch": 7.744371319709041, "grad_norm": 0.10641910880804062, "learning_rate": 2.255201109570042e-06, "loss": 0.0017, "step": 11179 }, { "epoch": 7.745064080360235, "grad_norm": 0.2373981475830078, "learning_rate": 2.2545076282940364e-06, "loss": 0.0022, "step": 11180 }, { "epoch": 7.74575684101143, "grad_norm": 0.12387380748987198, "learning_rate": 2.253814147018031e-06, "loss": 0.0018, "step": 11181 }, { "epoch": 7.746449601662626, "grad_norm": 0.06476127356290817, "learning_rate": 2.253120665742025e-06, "loss": 0.0011, "step": 11182 }, { "epoch": 7.747142362313821, "grad_norm": 0.1915445178747177, "learning_rate": 2.2524271844660195e-06, "loss": 0.0025, "step": 11183 }, { "epoch": 7.747835122965015, "grad_norm": 0.09372086822986603, "learning_rate": 2.251733703190014e-06, "loss": 0.0017, "step": 11184 }, { "epoch": 7.748527883616211, "grad_norm": 0.1344568133354187, "learning_rate": 2.2510402219140085e-06, "loss": 0.0023, "step": 11185 }, { "epoch": 7.749220644267406, "grad_norm": 0.24896401166915894, "learning_rate": 2.250346740638003e-06, "loss": 0.0023, "step": 11186 }, { "epoch": 7.7499134049186, "grad_norm": 0.1297737956047058, "learning_rate": 2.2496532593619975e-06, "loss": 0.0019, "step": 11187 }, { "epoch": 7.7506061655697955, "grad_norm": 0.1254224330186844, "learning_rate": 2.2489597780859916e-06, "loss": 0.0017, "step": 11188 }, { "epoch": 7.751298926220991, "grad_norm": 0.07593856751918793, "learning_rate": 2.2482662968099865e-06, "loss": 0.0013, "step": 11189 }, { "epoch": 7.751991686872186, "grad_norm": 0.07970157265663147, "learning_rate": 2.2475728155339806e-06, "loss": 0.0015, "step": 11190 }, { "epoch": 7.7526844475233805, "grad_norm": 0.1514560431241989, "learning_rate": 2.246879334257975e-06, "loss": 0.0024, "step": 11191 }, { "epoch": 7.753377208174576, "grad_norm": 0.10897752642631531, "learning_rate": 2.2461858529819696e-06, "loss": 0.0014, "step": 11192 }, { "epoch": 7.754069968825771, "grad_norm": 0.11987505853176117, "learning_rate": 2.245492371705964e-06, "loss": 0.0018, "step": 11193 }, { "epoch": 7.754762729476965, "grad_norm": 0.07282281666994095, "learning_rate": 2.2447988904299587e-06, "loss": 0.0014, "step": 11194 }, { "epoch": 7.755455490128161, "grad_norm": 0.08273394405841827, "learning_rate": 2.244105409153953e-06, "loss": 0.0014, "step": 11195 }, { "epoch": 7.756148250779356, "grad_norm": 0.3216395080089569, "learning_rate": 2.2434119278779477e-06, "loss": 0.0023, "step": 11196 }, { "epoch": 7.75684101143055, "grad_norm": 0.16991889476776123, "learning_rate": 2.2427184466019418e-06, "loss": 0.0017, "step": 11197 }, { "epoch": 7.757533772081746, "grad_norm": 0.14216946065425873, "learning_rate": 2.2420249653259363e-06, "loss": 0.0018, "step": 11198 }, { "epoch": 7.758226532732941, "grad_norm": 0.22020933032035828, "learning_rate": 2.241331484049931e-06, "loss": 0.0019, "step": 11199 }, { "epoch": 7.758919293384135, "grad_norm": 0.13811734318733215, "learning_rate": 2.2406380027739253e-06, "loss": 0.0018, "step": 11200 }, { "epoch": 7.759612054035331, "grad_norm": 0.3963909447193146, "learning_rate": 2.23994452149792e-06, "loss": 0.0038, "step": 11201 }, { "epoch": 7.760304814686526, "grad_norm": 0.09692416340112686, "learning_rate": 2.2392510402219143e-06, "loss": 0.0018, "step": 11202 }, { "epoch": 7.760997575337721, "grad_norm": 0.065221406519413, "learning_rate": 2.2385575589459084e-06, "loss": 0.0015, "step": 11203 }, { "epoch": 7.761690335988916, "grad_norm": 0.06640864908695221, "learning_rate": 2.2378640776699033e-06, "loss": 0.0013, "step": 11204 }, { "epoch": 7.762383096640111, "grad_norm": 0.06931870430707932, "learning_rate": 2.2371705963938974e-06, "loss": 0.0014, "step": 11205 }, { "epoch": 7.763075857291306, "grad_norm": 0.11891216039657593, "learning_rate": 2.236477115117892e-06, "loss": 0.0016, "step": 11206 }, { "epoch": 7.7637686179425005, "grad_norm": 0.11357012391090393, "learning_rate": 2.2357836338418865e-06, "loss": 0.0016, "step": 11207 }, { "epoch": 7.764461378593696, "grad_norm": 0.1463513821363449, "learning_rate": 2.235090152565881e-06, "loss": 0.0021, "step": 11208 }, { "epoch": 7.765154139244891, "grad_norm": 0.09959112852811813, "learning_rate": 2.2343966712898755e-06, "loss": 0.0015, "step": 11209 }, { "epoch": 7.765846899896086, "grad_norm": 0.07624318450689316, "learning_rate": 2.2337031900138696e-06, "loss": 0.0015, "step": 11210 }, { "epoch": 7.766539660547281, "grad_norm": 0.08544086664915085, "learning_rate": 2.2330097087378645e-06, "loss": 0.0014, "step": 11211 }, { "epoch": 7.767232421198476, "grad_norm": 0.2006032019853592, "learning_rate": 2.2323162274618586e-06, "loss": 0.0039, "step": 11212 }, { "epoch": 7.767925181849671, "grad_norm": 0.28245675563812256, "learning_rate": 2.231622746185853e-06, "loss": 0.0021, "step": 11213 }, { "epoch": 7.768617942500866, "grad_norm": 0.10247663408517838, "learning_rate": 2.2309292649098476e-06, "loss": 0.0016, "step": 11214 }, { "epoch": 7.769310703152061, "grad_norm": 0.06042948365211487, "learning_rate": 2.230235783633842e-06, "loss": 0.0015, "step": 11215 }, { "epoch": 7.770003463803256, "grad_norm": 0.803938090801239, "learning_rate": 2.2295423023578366e-06, "loss": 0.0037, "step": 11216 }, { "epoch": 7.770696224454451, "grad_norm": 0.13024838268756866, "learning_rate": 2.228848821081831e-06, "loss": 0.0016, "step": 11217 }, { "epoch": 7.771388985105646, "grad_norm": 0.10446521639823914, "learning_rate": 2.2281553398058252e-06, "loss": 0.0017, "step": 11218 }, { "epoch": 7.772081745756841, "grad_norm": 0.09668648988008499, "learning_rate": 2.22746185852982e-06, "loss": 0.0016, "step": 11219 }, { "epoch": 7.772774506408036, "grad_norm": 0.22087211906909943, "learning_rate": 2.2267683772538142e-06, "loss": 0.0015, "step": 11220 }, { "epoch": 7.773467267059231, "grad_norm": 0.36785584688186646, "learning_rate": 2.2260748959778087e-06, "loss": 0.0034, "step": 11221 }, { "epoch": 7.774160027710426, "grad_norm": 0.10302146524190903, "learning_rate": 2.2253814147018033e-06, "loss": 0.0019, "step": 11222 }, { "epoch": 7.774852788361621, "grad_norm": 0.0880153700709343, "learning_rate": 2.2246879334257978e-06, "loss": 0.0016, "step": 11223 }, { "epoch": 7.775545549012816, "grad_norm": 0.10583964735269547, "learning_rate": 2.2239944521497923e-06, "loss": 0.0015, "step": 11224 }, { "epoch": 7.776238309664011, "grad_norm": 0.23762349784374237, "learning_rate": 2.2233009708737864e-06, "loss": 0.0019, "step": 11225 }, { "epoch": 7.776931070315206, "grad_norm": 0.12018120288848877, "learning_rate": 2.2226074895977813e-06, "loss": 0.0014, "step": 11226 }, { "epoch": 7.777623830966401, "grad_norm": 0.15623846650123596, "learning_rate": 2.2219140083217754e-06, "loss": 0.0019, "step": 11227 }, { "epoch": 7.778316591617596, "grad_norm": 0.1327710598707199, "learning_rate": 2.22122052704577e-06, "loss": 0.0018, "step": 11228 }, { "epoch": 7.779009352268791, "grad_norm": 0.08099834620952606, "learning_rate": 2.2205270457697644e-06, "loss": 0.0013, "step": 11229 }, { "epoch": 7.779702112919987, "grad_norm": 0.1376233547925949, "learning_rate": 2.219833564493759e-06, "loss": 0.0019, "step": 11230 }, { "epoch": 7.780394873571181, "grad_norm": 0.1590290665626526, "learning_rate": 2.2191400832177534e-06, "loss": 0.0017, "step": 11231 }, { "epoch": 7.781087634222376, "grad_norm": 0.14185279607772827, "learning_rate": 2.218446601941748e-06, "loss": 0.0016, "step": 11232 }, { "epoch": 7.7817803948735715, "grad_norm": 0.08854740113019943, "learning_rate": 2.217753120665742e-06, "loss": 0.0015, "step": 11233 }, { "epoch": 7.782473155524766, "grad_norm": 0.21793046593666077, "learning_rate": 2.2170596393897365e-06, "loss": 0.002, "step": 11234 }, { "epoch": 7.783165916175961, "grad_norm": 0.14304323494434357, "learning_rate": 2.216366158113731e-06, "loss": 0.0019, "step": 11235 }, { "epoch": 7.7838586768271565, "grad_norm": 0.198887899518013, "learning_rate": 2.2156726768377255e-06, "loss": 0.0029, "step": 11236 }, { "epoch": 7.784551437478351, "grad_norm": 0.1203419491648674, "learning_rate": 2.21497919556172e-06, "loss": 0.0019, "step": 11237 }, { "epoch": 7.785244198129546, "grad_norm": 0.08119252324104309, "learning_rate": 2.2142857142857146e-06, "loss": 0.0014, "step": 11238 }, { "epoch": 7.785936958780741, "grad_norm": 0.12953047454357147, "learning_rate": 2.213592233009709e-06, "loss": 0.0017, "step": 11239 }, { "epoch": 7.786629719431936, "grad_norm": 0.14757809042930603, "learning_rate": 2.212898751733703e-06, "loss": 0.0025, "step": 11240 }, { "epoch": 7.787322480083131, "grad_norm": 0.18309348821640015, "learning_rate": 2.212205270457698e-06, "loss": 0.0017, "step": 11241 }, { "epoch": 7.788015240734326, "grad_norm": 0.09014245867729187, "learning_rate": 2.211511789181692e-06, "loss": 0.0017, "step": 11242 }, { "epoch": 7.788708001385522, "grad_norm": 0.06307058036327362, "learning_rate": 2.2108183079056867e-06, "loss": 0.0014, "step": 11243 }, { "epoch": 7.789400762036716, "grad_norm": 0.07024972885847092, "learning_rate": 2.210124826629681e-06, "loss": 0.0015, "step": 11244 }, { "epoch": 7.790093522687911, "grad_norm": 0.1116972491145134, "learning_rate": 2.2094313453536757e-06, "loss": 0.0019, "step": 11245 }, { "epoch": 7.790786283339107, "grad_norm": 0.1071903258562088, "learning_rate": 2.2087378640776702e-06, "loss": 0.002, "step": 11246 }, { "epoch": 7.791479043990301, "grad_norm": 0.12507693469524384, "learning_rate": 2.2080443828016647e-06, "loss": 0.0017, "step": 11247 }, { "epoch": 7.792171804641496, "grad_norm": 0.10897404700517654, "learning_rate": 2.207350901525659e-06, "loss": 0.002, "step": 11248 }, { "epoch": 7.792864565292692, "grad_norm": 0.1244838535785675, "learning_rate": 2.2066574202496533e-06, "loss": 0.0016, "step": 11249 }, { "epoch": 7.793557325943887, "grad_norm": 0.11542940884828568, "learning_rate": 2.205963938973648e-06, "loss": 0.0023, "step": 11250 }, { "epoch": 7.794250086595081, "grad_norm": 0.12995007634162903, "learning_rate": 2.2052704576976423e-06, "loss": 0.0018, "step": 11251 }, { "epoch": 7.7949428472462765, "grad_norm": 0.34024178981781006, "learning_rate": 2.204576976421637e-06, "loss": 0.002, "step": 11252 }, { "epoch": 7.795635607897472, "grad_norm": 0.184537872672081, "learning_rate": 2.2038834951456314e-06, "loss": 0.0025, "step": 11253 }, { "epoch": 7.796328368548666, "grad_norm": 0.3570444583892822, "learning_rate": 2.203190013869626e-06, "loss": 0.0032, "step": 11254 }, { "epoch": 7.7970211291998615, "grad_norm": 0.2577095925807953, "learning_rate": 2.20249653259362e-06, "loss": 0.0046, "step": 11255 }, { "epoch": 7.797713889851057, "grad_norm": 0.22864298522472382, "learning_rate": 2.201803051317615e-06, "loss": 0.0021, "step": 11256 }, { "epoch": 7.798406650502251, "grad_norm": 0.17480036616325378, "learning_rate": 2.201109570041609e-06, "loss": 0.0022, "step": 11257 }, { "epoch": 7.799099411153446, "grad_norm": 0.08790505677461624, "learning_rate": 2.2004160887656035e-06, "loss": 0.0016, "step": 11258 }, { "epoch": 7.799792171804642, "grad_norm": 0.34003666043281555, "learning_rate": 2.199722607489598e-06, "loss": 0.004, "step": 11259 }, { "epoch": 7.800484932455836, "grad_norm": 0.05693638697266579, "learning_rate": 2.1990291262135925e-06, "loss": 0.0014, "step": 11260 }, { "epoch": 7.801177693107031, "grad_norm": 0.19102239608764648, "learning_rate": 2.198335644937587e-06, "loss": 0.0021, "step": 11261 }, { "epoch": 7.801870453758227, "grad_norm": 0.3277992010116577, "learning_rate": 2.1976421636615815e-06, "loss": 0.0022, "step": 11262 }, { "epoch": 7.802563214409422, "grad_norm": 0.10052553564310074, "learning_rate": 2.1969486823855756e-06, "loss": 0.0017, "step": 11263 }, { "epoch": 7.803255975060616, "grad_norm": 0.09415896981954575, "learning_rate": 2.19625520110957e-06, "loss": 0.0017, "step": 11264 }, { "epoch": 7.803948735711812, "grad_norm": 0.28450697660446167, "learning_rate": 2.1955617198335646e-06, "loss": 0.0029, "step": 11265 }, { "epoch": 7.804641496363007, "grad_norm": 0.10606548190116882, "learning_rate": 2.194868238557559e-06, "loss": 0.0015, "step": 11266 }, { "epoch": 7.805334257014201, "grad_norm": 0.10414156317710876, "learning_rate": 2.1941747572815537e-06, "loss": 0.0027, "step": 11267 }, { "epoch": 7.8060270176653965, "grad_norm": 0.17339912056922913, "learning_rate": 2.1934812760055477e-06, "loss": 0.0026, "step": 11268 }, { "epoch": 7.806719778316592, "grad_norm": 0.07309307903051376, "learning_rate": 2.1927877947295427e-06, "loss": 0.0013, "step": 11269 }, { "epoch": 7.807412538967787, "grad_norm": 0.17403848469257355, "learning_rate": 2.1920943134535368e-06, "loss": 0.0024, "step": 11270 }, { "epoch": 7.8081052996189815, "grad_norm": 0.10918648540973663, "learning_rate": 2.1914008321775317e-06, "loss": 0.0019, "step": 11271 }, { "epoch": 7.808798060270177, "grad_norm": 0.10794667154550552, "learning_rate": 2.1907073509015258e-06, "loss": 0.0016, "step": 11272 }, { "epoch": 7.809490820921372, "grad_norm": 0.11508607864379883, "learning_rate": 2.1900138696255203e-06, "loss": 0.0018, "step": 11273 }, { "epoch": 7.810183581572566, "grad_norm": 0.16168354451656342, "learning_rate": 2.189320388349515e-06, "loss": 0.0018, "step": 11274 }, { "epoch": 7.810876342223762, "grad_norm": 0.2915906310081482, "learning_rate": 2.1886269070735093e-06, "loss": 0.0022, "step": 11275 }, { "epoch": 7.811569102874957, "grad_norm": 0.0944768488407135, "learning_rate": 2.187933425797504e-06, "loss": 0.0014, "step": 11276 }, { "epoch": 7.812261863526151, "grad_norm": 0.06748131662607193, "learning_rate": 2.1872399445214983e-06, "loss": 0.0013, "step": 11277 }, { "epoch": 7.812954624177347, "grad_norm": 0.12040342390537262, "learning_rate": 2.1865464632454924e-06, "loss": 0.0014, "step": 11278 }, { "epoch": 7.813647384828542, "grad_norm": 0.181565523147583, "learning_rate": 2.185852981969487e-06, "loss": 0.0029, "step": 11279 }, { "epoch": 7.814340145479736, "grad_norm": 0.05621764063835144, "learning_rate": 2.1851595006934814e-06, "loss": 0.0012, "step": 11280 }, { "epoch": 7.815032906130932, "grad_norm": 0.15113765001296997, "learning_rate": 2.184466019417476e-06, "loss": 0.0018, "step": 11281 }, { "epoch": 7.815725666782127, "grad_norm": 0.06723956763744354, "learning_rate": 2.1837725381414705e-06, "loss": 0.0013, "step": 11282 }, { "epoch": 7.816418427433322, "grad_norm": 0.3941515386104584, "learning_rate": 2.1830790568654645e-06, "loss": 0.0024, "step": 11283 }, { "epoch": 7.8171111880845165, "grad_norm": 0.0861259177327156, "learning_rate": 2.1823855755894595e-06, "loss": 0.0017, "step": 11284 }, { "epoch": 7.817803948735712, "grad_norm": 0.11990301311016083, "learning_rate": 2.1816920943134536e-06, "loss": 0.0016, "step": 11285 }, { "epoch": 7.818496709386907, "grad_norm": 0.10869354009628296, "learning_rate": 2.180998613037448e-06, "loss": 0.0015, "step": 11286 }, { "epoch": 7.8191894700381015, "grad_norm": 0.13147354125976562, "learning_rate": 2.1803051317614426e-06, "loss": 0.0023, "step": 11287 }, { "epoch": 7.819882230689297, "grad_norm": 0.10592517256736755, "learning_rate": 2.179611650485437e-06, "loss": 0.0016, "step": 11288 }, { "epoch": 7.820574991340492, "grad_norm": 0.695120632648468, "learning_rate": 2.1789181692094316e-06, "loss": 0.002, "step": 11289 }, { "epoch": 7.821267751991687, "grad_norm": 0.17284594476222992, "learning_rate": 2.178224687933426e-06, "loss": 0.0019, "step": 11290 }, { "epoch": 7.821960512642882, "grad_norm": 0.19730062782764435, "learning_rate": 2.17753120665742e-06, "loss": 0.003, "step": 11291 }, { "epoch": 7.822653273294077, "grad_norm": 0.12815114855766296, "learning_rate": 2.1768377253814147e-06, "loss": 0.002, "step": 11292 }, { "epoch": 7.823346033945272, "grad_norm": 0.07243195921182632, "learning_rate": 2.1761442441054092e-06, "loss": 0.0015, "step": 11293 }, { "epoch": 7.824038794596467, "grad_norm": 0.12785346806049347, "learning_rate": 2.1754507628294037e-06, "loss": 0.0017, "step": 11294 }, { "epoch": 7.824731555247662, "grad_norm": 0.08393670618534088, "learning_rate": 2.1747572815533982e-06, "loss": 0.0017, "step": 11295 }, { "epoch": 7.825424315898857, "grad_norm": 0.3895988464355469, "learning_rate": 2.1740638002773927e-06, "loss": 0.0024, "step": 11296 }, { "epoch": 7.826117076550052, "grad_norm": 0.20433297753334045, "learning_rate": 2.1733703190013873e-06, "loss": 0.0014, "step": 11297 }, { "epoch": 7.826809837201247, "grad_norm": 0.09145063906908035, "learning_rate": 2.1726768377253813e-06, "loss": 0.0017, "step": 11298 }, { "epoch": 7.827502597852442, "grad_norm": 0.10940881818532944, "learning_rate": 2.1719833564493763e-06, "loss": 0.0015, "step": 11299 }, { "epoch": 7.828195358503637, "grad_norm": 0.17061269283294678, "learning_rate": 2.1712898751733704e-06, "loss": 0.0019, "step": 11300 }, { "epoch": 7.828888119154832, "grad_norm": 0.1235680803656578, "learning_rate": 2.170596393897365e-06, "loss": 0.0016, "step": 11301 }, { "epoch": 7.829580879806027, "grad_norm": 0.06370677053928375, "learning_rate": 2.1699029126213594e-06, "loss": 0.0012, "step": 11302 }, { "epoch": 7.830273640457222, "grad_norm": 0.22167515754699707, "learning_rate": 2.169209431345354e-06, "loss": 0.0023, "step": 11303 }, { "epoch": 7.830966401108417, "grad_norm": 0.10389106720685959, "learning_rate": 2.1685159500693484e-06, "loss": 0.0017, "step": 11304 }, { "epoch": 7.831659161759612, "grad_norm": 0.1505199521780014, "learning_rate": 2.167822468793343e-06, "loss": 0.0031, "step": 11305 }, { "epoch": 7.832351922410807, "grad_norm": 0.1148136705160141, "learning_rate": 2.167128987517337e-06, "loss": 0.0018, "step": 11306 }, { "epoch": 7.833044683062002, "grad_norm": 0.057822197675704956, "learning_rate": 2.1664355062413315e-06, "loss": 0.0011, "step": 11307 }, { "epoch": 7.833737443713197, "grad_norm": 0.09731818735599518, "learning_rate": 2.165742024965326e-06, "loss": 0.0015, "step": 11308 }, { "epoch": 7.834430204364392, "grad_norm": 0.19469495117664337, "learning_rate": 2.1650485436893205e-06, "loss": 0.0025, "step": 11309 }, { "epoch": 7.835122965015588, "grad_norm": 0.12179212272167206, "learning_rate": 2.164355062413315e-06, "loss": 0.0014, "step": 11310 }, { "epoch": 7.835815725666782, "grad_norm": 0.28897035121917725, "learning_rate": 2.1636615811373096e-06, "loss": 0.0027, "step": 11311 }, { "epoch": 7.836508486317977, "grad_norm": 0.24937516450881958, "learning_rate": 2.162968099861304e-06, "loss": 0.0019, "step": 11312 }, { "epoch": 7.8372012469691725, "grad_norm": 0.08942914009094238, "learning_rate": 2.162274618585298e-06, "loss": 0.0015, "step": 11313 }, { "epoch": 7.837894007620367, "grad_norm": 0.06706539541482925, "learning_rate": 2.161581137309293e-06, "loss": 0.0014, "step": 11314 }, { "epoch": 7.838586768271562, "grad_norm": 0.19965887069702148, "learning_rate": 2.160887656033287e-06, "loss": 0.002, "step": 11315 }, { "epoch": 7.8392795289227575, "grad_norm": 0.1354026347398758, "learning_rate": 2.1601941747572817e-06, "loss": 0.0024, "step": 11316 }, { "epoch": 7.839972289573952, "grad_norm": 0.15121757984161377, "learning_rate": 2.159500693481276e-06, "loss": 0.0019, "step": 11317 }, { "epoch": 7.840665050225147, "grad_norm": 0.14452719688415527, "learning_rate": 2.1588072122052707e-06, "loss": 0.0021, "step": 11318 }, { "epoch": 7.841357810876342, "grad_norm": 0.10823360830545425, "learning_rate": 2.158113730929265e-06, "loss": 0.0016, "step": 11319 }, { "epoch": 7.842050571527537, "grad_norm": 0.06858740746974945, "learning_rate": 2.1574202496532597e-06, "loss": 0.0015, "step": 11320 }, { "epoch": 7.842743332178732, "grad_norm": 0.0829811617732048, "learning_rate": 2.156726768377254e-06, "loss": 0.0015, "step": 11321 }, { "epoch": 7.843436092829927, "grad_norm": 0.09882291406393051, "learning_rate": 2.1560332871012483e-06, "loss": 0.0013, "step": 11322 }, { "epoch": 7.844128853481123, "grad_norm": 0.34364140033721924, "learning_rate": 2.155339805825243e-06, "loss": 0.0026, "step": 11323 }, { "epoch": 7.844821614132317, "grad_norm": 0.10191264748573303, "learning_rate": 2.1546463245492373e-06, "loss": 0.0017, "step": 11324 }, { "epoch": 7.845514374783512, "grad_norm": 0.08529110252857208, "learning_rate": 2.153952843273232e-06, "loss": 0.0013, "step": 11325 }, { "epoch": 7.846207135434708, "grad_norm": 0.12132897973060608, "learning_rate": 2.153259361997226e-06, "loss": 0.0014, "step": 11326 }, { "epoch": 7.846899896085902, "grad_norm": 0.13429586589336395, "learning_rate": 2.152565880721221e-06, "loss": 0.0015, "step": 11327 }, { "epoch": 7.847592656737097, "grad_norm": 0.4025964140892029, "learning_rate": 2.151872399445215e-06, "loss": 0.0028, "step": 11328 }, { "epoch": 7.8482854173882926, "grad_norm": 0.14849776029586792, "learning_rate": 2.15117891816921e-06, "loss": 0.0017, "step": 11329 }, { "epoch": 7.848978178039488, "grad_norm": 0.09574708342552185, "learning_rate": 2.150485436893204e-06, "loss": 0.0016, "step": 11330 }, { "epoch": 7.849670938690682, "grad_norm": 0.1349084973335266, "learning_rate": 2.1497919556171985e-06, "loss": 0.0015, "step": 11331 }, { "epoch": 7.8503636993418775, "grad_norm": 0.0910324901342392, "learning_rate": 2.149098474341193e-06, "loss": 0.0014, "step": 11332 }, { "epoch": 7.851056459993073, "grad_norm": 0.08863872289657593, "learning_rate": 2.1484049930651875e-06, "loss": 0.0015, "step": 11333 }, { "epoch": 7.851749220644267, "grad_norm": 0.05655772611498833, "learning_rate": 2.147711511789182e-06, "loss": 0.0013, "step": 11334 }, { "epoch": 7.8524419812954624, "grad_norm": 0.14138595759868622, "learning_rate": 2.1470180305131765e-06, "loss": 0.0016, "step": 11335 }, { "epoch": 7.853134741946658, "grad_norm": 0.062396854162216187, "learning_rate": 2.1463245492371706e-06, "loss": 0.0013, "step": 11336 }, { "epoch": 7.853827502597852, "grad_norm": 0.08552103489637375, "learning_rate": 2.145631067961165e-06, "loss": 0.0017, "step": 11337 }, { "epoch": 7.854520263249047, "grad_norm": 0.2230331301689148, "learning_rate": 2.1449375866851596e-06, "loss": 0.0024, "step": 11338 }, { "epoch": 7.855213023900243, "grad_norm": 0.23418837785720825, "learning_rate": 2.144244105409154e-06, "loss": 0.0018, "step": 11339 }, { "epoch": 7.855905784551437, "grad_norm": 0.4438765048980713, "learning_rate": 2.1435506241331486e-06, "loss": 0.0045, "step": 11340 }, { "epoch": 7.856598545202632, "grad_norm": 0.549567461013794, "learning_rate": 2.1428571428571427e-06, "loss": 0.0041, "step": 11341 }, { "epoch": 7.857291305853828, "grad_norm": 0.09813400357961655, "learning_rate": 2.1421636615811377e-06, "loss": 0.0014, "step": 11342 }, { "epoch": 7.857984066505023, "grad_norm": 0.0821555033326149, "learning_rate": 2.1414701803051317e-06, "loss": 0.0016, "step": 11343 }, { "epoch": 7.858676827156217, "grad_norm": 0.1884317547082901, "learning_rate": 2.1407766990291267e-06, "loss": 0.0017, "step": 11344 }, { "epoch": 7.859369587807413, "grad_norm": 0.17601996660232544, "learning_rate": 2.1400832177531208e-06, "loss": 0.0016, "step": 11345 }, { "epoch": 7.860062348458608, "grad_norm": 0.14312589168548584, "learning_rate": 2.1393897364771153e-06, "loss": 0.0024, "step": 11346 }, { "epoch": 7.860755109109802, "grad_norm": 0.1217978298664093, "learning_rate": 2.13869625520111e-06, "loss": 0.0018, "step": 11347 }, { "epoch": 7.8614478697609975, "grad_norm": 0.4607955515384674, "learning_rate": 2.1380027739251043e-06, "loss": 0.0019, "step": 11348 }, { "epoch": 7.862140630412193, "grad_norm": 0.24453143775463104, "learning_rate": 2.137309292649099e-06, "loss": 0.002, "step": 11349 }, { "epoch": 7.862833391063388, "grad_norm": 0.14901407063007355, "learning_rate": 2.136615811373093e-06, "loss": 0.002, "step": 11350 }, { "epoch": 7.8635261517145825, "grad_norm": 0.26923948526382446, "learning_rate": 2.1359223300970874e-06, "loss": 0.0021, "step": 11351 }, { "epoch": 7.864218912365778, "grad_norm": 0.10520078986883163, "learning_rate": 2.135228848821082e-06, "loss": 0.0017, "step": 11352 }, { "epoch": 7.864911673016973, "grad_norm": 0.10828854888677597, "learning_rate": 2.1345353675450764e-06, "loss": 0.0016, "step": 11353 }, { "epoch": 7.865604433668167, "grad_norm": 0.1414390653371811, "learning_rate": 2.133841886269071e-06, "loss": 0.0027, "step": 11354 }, { "epoch": 7.866297194319363, "grad_norm": 0.2203105390071869, "learning_rate": 2.1331484049930654e-06, "loss": 0.0022, "step": 11355 }, { "epoch": 7.866989954970558, "grad_norm": 0.36115825176239014, "learning_rate": 2.1324549237170595e-06, "loss": 0.0035, "step": 11356 }, { "epoch": 7.867682715621752, "grad_norm": 0.6102029085159302, "learning_rate": 2.1317614424410545e-06, "loss": 0.0026, "step": 11357 }, { "epoch": 7.868375476272948, "grad_norm": 0.14479592442512512, "learning_rate": 2.1310679611650486e-06, "loss": 0.0017, "step": 11358 }, { "epoch": 7.869068236924143, "grad_norm": 0.4564879834651947, "learning_rate": 2.1303744798890435e-06, "loss": 0.0022, "step": 11359 }, { "epoch": 7.869760997575337, "grad_norm": 0.10407575964927673, "learning_rate": 2.1296809986130376e-06, "loss": 0.0016, "step": 11360 }, { "epoch": 7.870453758226533, "grad_norm": 0.2893345057964325, "learning_rate": 2.128987517337032e-06, "loss": 0.0029, "step": 11361 }, { "epoch": 7.871146518877728, "grad_norm": 0.10947423428297043, "learning_rate": 2.1282940360610266e-06, "loss": 0.0023, "step": 11362 }, { "epoch": 7.871839279528922, "grad_norm": 0.06859258562326431, "learning_rate": 2.127600554785021e-06, "loss": 0.0012, "step": 11363 }, { "epoch": 7.8725320401801175, "grad_norm": 0.1107030063867569, "learning_rate": 2.1269070735090156e-06, "loss": 0.0016, "step": 11364 }, { "epoch": 7.873224800831313, "grad_norm": 0.16727851331233978, "learning_rate": 2.1262135922330097e-06, "loss": 0.0016, "step": 11365 }, { "epoch": 7.873917561482508, "grad_norm": 0.112326979637146, "learning_rate": 2.125520110957004e-06, "loss": 0.0021, "step": 11366 }, { "epoch": 7.8746103221337025, "grad_norm": 0.13387130200862885, "learning_rate": 2.1248266296809987e-06, "loss": 0.002, "step": 11367 }, { "epoch": 7.875303082784898, "grad_norm": 0.2403022199869156, "learning_rate": 2.1241331484049932e-06, "loss": 0.0028, "step": 11368 }, { "epoch": 7.875995843436093, "grad_norm": 0.10315826535224915, "learning_rate": 2.1234396671289877e-06, "loss": 0.002, "step": 11369 }, { "epoch": 7.876688604087288, "grad_norm": 0.2243594378232956, "learning_rate": 2.1227461858529822e-06, "loss": 0.002, "step": 11370 }, { "epoch": 7.877381364738483, "grad_norm": 0.08643249422311783, "learning_rate": 2.1220527045769763e-06, "loss": 0.0015, "step": 11371 }, { "epoch": 7.878074125389678, "grad_norm": 0.42081764340400696, "learning_rate": 2.1213592233009713e-06, "loss": 0.0049, "step": 11372 }, { "epoch": 7.878766886040873, "grad_norm": 0.14030934870243073, "learning_rate": 2.1206657420249654e-06, "loss": 0.0021, "step": 11373 }, { "epoch": 7.879459646692068, "grad_norm": 0.08104424178600311, "learning_rate": 2.11997226074896e-06, "loss": 0.0015, "step": 11374 }, { "epoch": 7.880152407343263, "grad_norm": 0.09887759387493134, "learning_rate": 2.1192787794729544e-06, "loss": 0.0015, "step": 11375 }, { "epoch": 7.880845167994458, "grad_norm": 0.14208678901195526, "learning_rate": 2.118585298196949e-06, "loss": 0.0023, "step": 11376 }, { "epoch": 7.881537928645653, "grad_norm": 0.07758071273565292, "learning_rate": 2.1178918169209434e-06, "loss": 0.0014, "step": 11377 }, { "epoch": 7.882230689296848, "grad_norm": 0.21352963149547577, "learning_rate": 2.117198335644938e-06, "loss": 0.0021, "step": 11378 }, { "epoch": 7.882923449948043, "grad_norm": 0.2536875605583191, "learning_rate": 2.1165048543689324e-06, "loss": 0.0021, "step": 11379 }, { "epoch": 7.883616210599238, "grad_norm": 0.1069861650466919, "learning_rate": 2.1158113730929265e-06, "loss": 0.0015, "step": 11380 }, { "epoch": 7.884308971250433, "grad_norm": 0.2671584486961365, "learning_rate": 2.115117891816921e-06, "loss": 0.0025, "step": 11381 }, { "epoch": 7.885001731901628, "grad_norm": 0.10854753106832504, "learning_rate": 2.1144244105409155e-06, "loss": 0.0017, "step": 11382 }, { "epoch": 7.8856944925528225, "grad_norm": 0.13896214962005615, "learning_rate": 2.11373092926491e-06, "loss": 0.0021, "step": 11383 }, { "epoch": 7.886387253204018, "grad_norm": 0.10801216959953308, "learning_rate": 2.1130374479889045e-06, "loss": 0.0018, "step": 11384 }, { "epoch": 7.887080013855213, "grad_norm": 0.1705852597951889, "learning_rate": 2.112343966712899e-06, "loss": 0.002, "step": 11385 }, { "epoch": 7.887772774506408, "grad_norm": 0.18517433106899261, "learning_rate": 2.111650485436893e-06, "loss": 0.0024, "step": 11386 }, { "epoch": 7.888465535157603, "grad_norm": 0.08442562073469162, "learning_rate": 2.110957004160888e-06, "loss": 0.0018, "step": 11387 }, { "epoch": 7.889158295808798, "grad_norm": 0.10756903886795044, "learning_rate": 2.110263522884882e-06, "loss": 0.0019, "step": 11388 }, { "epoch": 7.889851056459993, "grad_norm": 0.13703681528568268, "learning_rate": 2.1095700416088767e-06, "loss": 0.0016, "step": 11389 }, { "epoch": 7.890543817111189, "grad_norm": 0.13048480451107025, "learning_rate": 2.108876560332871e-06, "loss": 0.0014, "step": 11390 }, { "epoch": 7.891236577762383, "grad_norm": 0.20304864645004272, "learning_rate": 2.1081830790568657e-06, "loss": 0.0018, "step": 11391 }, { "epoch": 7.891929338413578, "grad_norm": 0.056315258145332336, "learning_rate": 2.10748959778086e-06, "loss": 0.0012, "step": 11392 }, { "epoch": 7.8926220990647735, "grad_norm": 0.13983847200870514, "learning_rate": 2.1067961165048547e-06, "loss": 0.0018, "step": 11393 }, { "epoch": 7.893314859715968, "grad_norm": 0.10653576254844666, "learning_rate": 2.1061026352288492e-06, "loss": 0.0016, "step": 11394 }, { "epoch": 7.894007620367163, "grad_norm": 0.15684229135513306, "learning_rate": 2.1054091539528433e-06, "loss": 0.0021, "step": 11395 }, { "epoch": 7.8947003810183585, "grad_norm": 0.1900988519191742, "learning_rate": 2.104715672676838e-06, "loss": 0.0019, "step": 11396 }, { "epoch": 7.895393141669553, "grad_norm": 0.23246794939041138, "learning_rate": 2.1040221914008323e-06, "loss": 0.0021, "step": 11397 }, { "epoch": 7.896085902320748, "grad_norm": 0.13619092106819153, "learning_rate": 2.103328710124827e-06, "loss": 0.0016, "step": 11398 }, { "epoch": 7.896778662971943, "grad_norm": 0.4182014465332031, "learning_rate": 2.1026352288488213e-06, "loss": 0.0023, "step": 11399 }, { "epoch": 7.897471423623138, "grad_norm": 0.1065654307603836, "learning_rate": 2.101941747572816e-06, "loss": 0.0015, "step": 11400 }, { "epoch": 7.898164184274333, "grad_norm": 0.3538290858268738, "learning_rate": 2.10124826629681e-06, "loss": 0.0028, "step": 11401 }, { "epoch": 7.898856944925528, "grad_norm": 0.1610267460346222, "learning_rate": 2.100554785020805e-06, "loss": 0.0017, "step": 11402 }, { "epoch": 7.899549705576723, "grad_norm": 0.10012906044721603, "learning_rate": 2.099861303744799e-06, "loss": 0.0016, "step": 11403 }, { "epoch": 7.900242466227918, "grad_norm": 0.13199912011623383, "learning_rate": 2.0991678224687935e-06, "loss": 0.0017, "step": 11404 }, { "epoch": 7.900935226879113, "grad_norm": 0.11438145488500595, "learning_rate": 2.098474341192788e-06, "loss": 0.0015, "step": 11405 }, { "epoch": 7.901627987530309, "grad_norm": 0.12943808734416962, "learning_rate": 2.0977808599167825e-06, "loss": 0.002, "step": 11406 }, { "epoch": 7.902320748181503, "grad_norm": 0.24114668369293213, "learning_rate": 2.097087378640777e-06, "loss": 0.002, "step": 11407 }, { "epoch": 7.903013508832698, "grad_norm": 0.0662049949169159, "learning_rate": 2.096393897364771e-06, "loss": 0.0013, "step": 11408 }, { "epoch": 7.9037062694838935, "grad_norm": 0.10101411491632462, "learning_rate": 2.095700416088766e-06, "loss": 0.0015, "step": 11409 }, { "epoch": 7.904399030135089, "grad_norm": 0.1440206617116928, "learning_rate": 2.09500693481276e-06, "loss": 0.0018, "step": 11410 }, { "epoch": 7.905091790786283, "grad_norm": 0.193631112575531, "learning_rate": 2.0943134535367546e-06, "loss": 0.0023, "step": 11411 }, { "epoch": 7.9057845514374785, "grad_norm": 0.13785867393016815, "learning_rate": 2.093619972260749e-06, "loss": 0.0016, "step": 11412 }, { "epoch": 7.906477312088674, "grad_norm": 0.18846537172794342, "learning_rate": 2.0929264909847436e-06, "loss": 0.0015, "step": 11413 }, { "epoch": 7.907170072739868, "grad_norm": 0.38149115443229675, "learning_rate": 2.092233009708738e-06, "loss": 0.0027, "step": 11414 }, { "epoch": 7.907862833391063, "grad_norm": 0.2141820341348648, "learning_rate": 2.0915395284327327e-06, "loss": 0.0025, "step": 11415 }, { "epoch": 7.908555594042259, "grad_norm": 0.10129716992378235, "learning_rate": 2.0908460471567267e-06, "loss": 0.0016, "step": 11416 }, { "epoch": 7.909248354693453, "grad_norm": 0.08857739716768265, "learning_rate": 2.0901525658807217e-06, "loss": 0.0014, "step": 11417 }, { "epoch": 7.909941115344648, "grad_norm": 0.14479796588420868, "learning_rate": 2.0894590846047158e-06, "loss": 0.0022, "step": 11418 }, { "epoch": 7.910633875995844, "grad_norm": 0.19538909196853638, "learning_rate": 2.0887656033287103e-06, "loss": 0.0025, "step": 11419 }, { "epoch": 7.911326636647038, "grad_norm": 0.07969465106725693, "learning_rate": 2.0880721220527048e-06, "loss": 0.0015, "step": 11420 }, { "epoch": 7.912019397298233, "grad_norm": 0.09164320677518845, "learning_rate": 2.0873786407766993e-06, "loss": 0.0014, "step": 11421 }, { "epoch": 7.912712157949429, "grad_norm": 0.0549982450902462, "learning_rate": 2.086685159500694e-06, "loss": 0.0013, "step": 11422 }, { "epoch": 7.913404918600623, "grad_norm": 0.1624305099248886, "learning_rate": 2.085991678224688e-06, "loss": 0.0019, "step": 11423 }, { "epoch": 7.914097679251818, "grad_norm": 0.12367258220911026, "learning_rate": 2.085298196948683e-06, "loss": 0.0015, "step": 11424 }, { "epoch": 7.914790439903014, "grad_norm": 0.3276459872722626, "learning_rate": 2.084604715672677e-06, "loss": 0.0025, "step": 11425 }, { "epoch": 7.915483200554209, "grad_norm": 0.13010117411613464, "learning_rate": 2.0839112343966714e-06, "loss": 0.0021, "step": 11426 }, { "epoch": 7.916175961205403, "grad_norm": 0.08906543254852295, "learning_rate": 2.083217753120666e-06, "loss": 0.0017, "step": 11427 }, { "epoch": 7.9168687218565985, "grad_norm": 0.07558631896972656, "learning_rate": 2.0825242718446604e-06, "loss": 0.0014, "step": 11428 }, { "epoch": 7.917561482507794, "grad_norm": 0.12094772607088089, "learning_rate": 2.081830790568655e-06, "loss": 0.0021, "step": 11429 }, { "epoch": 7.918254243158988, "grad_norm": 0.22087305784225464, "learning_rate": 2.0811373092926495e-06, "loss": 0.0035, "step": 11430 }, { "epoch": 7.9189470038101835, "grad_norm": 0.1467932164669037, "learning_rate": 2.0804438280166435e-06, "loss": 0.0016, "step": 11431 }, { "epoch": 7.919639764461379, "grad_norm": 0.12079876661300659, "learning_rate": 2.079750346740638e-06, "loss": 0.0019, "step": 11432 }, { "epoch": 7.920332525112574, "grad_norm": 0.3745475709438324, "learning_rate": 2.0790568654646326e-06, "loss": 0.0023, "step": 11433 }, { "epoch": 7.921025285763768, "grad_norm": 0.11212224513292313, "learning_rate": 2.078363384188627e-06, "loss": 0.0019, "step": 11434 }, { "epoch": 7.921718046414964, "grad_norm": 0.13426125049591064, "learning_rate": 2.0776699029126216e-06, "loss": 0.0015, "step": 11435 }, { "epoch": 7.922410807066159, "grad_norm": 0.04780206456780434, "learning_rate": 2.076976421636616e-06, "loss": 0.0013, "step": 11436 }, { "epoch": 7.923103567717353, "grad_norm": 0.22956909239292145, "learning_rate": 2.0762829403606106e-06, "loss": 0.0027, "step": 11437 }, { "epoch": 7.923796328368549, "grad_norm": 0.08223120868206024, "learning_rate": 2.0755894590846047e-06, "loss": 0.0013, "step": 11438 }, { "epoch": 7.924489089019744, "grad_norm": 0.1168670505285263, "learning_rate": 2.0748959778085996e-06, "loss": 0.0014, "step": 11439 }, { "epoch": 7.925181849670938, "grad_norm": 0.11173796653747559, "learning_rate": 2.0742024965325937e-06, "loss": 0.0019, "step": 11440 }, { "epoch": 7.925874610322134, "grad_norm": 0.12356695532798767, "learning_rate": 2.0735090152565882e-06, "loss": 0.002, "step": 11441 }, { "epoch": 7.926567370973329, "grad_norm": 0.07252473384141922, "learning_rate": 2.0728155339805827e-06, "loss": 0.0015, "step": 11442 }, { "epoch": 7.927260131624523, "grad_norm": 0.22668412327766418, "learning_rate": 2.0721220527045772e-06, "loss": 0.0019, "step": 11443 }, { "epoch": 7.9279528922757185, "grad_norm": 0.09695761650800705, "learning_rate": 2.0714285714285717e-06, "loss": 0.0016, "step": 11444 }, { "epoch": 7.928645652926914, "grad_norm": 0.09948756545782089, "learning_rate": 2.0707350901525663e-06, "loss": 0.0016, "step": 11445 }, { "epoch": 7.929338413578109, "grad_norm": 0.22758884727954865, "learning_rate": 2.0700416088765603e-06, "loss": 0.0021, "step": 11446 }, { "epoch": 7.9300311742293035, "grad_norm": 0.0764935314655304, "learning_rate": 2.069348127600555e-06, "loss": 0.0015, "step": 11447 }, { "epoch": 7.930723934880499, "grad_norm": 0.20141349732875824, "learning_rate": 2.0686546463245494e-06, "loss": 0.0026, "step": 11448 }, { "epoch": 7.931416695531694, "grad_norm": 0.3428936302661896, "learning_rate": 2.067961165048544e-06, "loss": 0.0029, "step": 11449 }, { "epoch": 7.932109456182888, "grad_norm": 0.06243060529232025, "learning_rate": 2.0672676837725384e-06, "loss": 0.0012, "step": 11450 }, { "epoch": 7.932802216834084, "grad_norm": 0.1965719759464264, "learning_rate": 2.066574202496533e-06, "loss": 0.0018, "step": 11451 }, { "epoch": 7.933494977485279, "grad_norm": 0.2134178876876831, "learning_rate": 2.0658807212205274e-06, "loss": 0.0021, "step": 11452 }, { "epoch": 7.934187738136474, "grad_norm": 0.2270844727754593, "learning_rate": 2.0651872399445215e-06, "loss": 0.0017, "step": 11453 }, { "epoch": 7.934880498787669, "grad_norm": 0.07822228223085403, "learning_rate": 2.0644937586685164e-06, "loss": 0.0013, "step": 11454 }, { "epoch": 7.935573259438864, "grad_norm": 0.10275891423225403, "learning_rate": 2.0638002773925105e-06, "loss": 0.0016, "step": 11455 }, { "epoch": 7.936266020090059, "grad_norm": 0.16150107979774475, "learning_rate": 2.063106796116505e-06, "loss": 0.0018, "step": 11456 }, { "epoch": 7.936958780741254, "grad_norm": 0.16756756603717804, "learning_rate": 2.0624133148404995e-06, "loss": 0.0025, "step": 11457 }, { "epoch": 7.937651541392449, "grad_norm": 0.2795891761779785, "learning_rate": 2.061719833564494e-06, "loss": 0.002, "step": 11458 }, { "epoch": 7.938344302043644, "grad_norm": 0.16245059669017792, "learning_rate": 2.061026352288488e-06, "loss": 0.0018, "step": 11459 }, { "epoch": 7.9390370626948386, "grad_norm": 0.25377747416496277, "learning_rate": 2.060332871012483e-06, "loss": 0.0021, "step": 11460 }, { "epoch": 7.939729823346034, "grad_norm": 0.11814465373754501, "learning_rate": 2.059639389736477e-06, "loss": 0.0014, "step": 11461 }, { "epoch": 7.940422583997229, "grad_norm": 0.14700928330421448, "learning_rate": 2.0589459084604717e-06, "loss": 0.0026, "step": 11462 }, { "epoch": 7.9411153446484235, "grad_norm": 0.11324573308229446, "learning_rate": 2.058252427184466e-06, "loss": 0.0015, "step": 11463 }, { "epoch": 7.941808105299619, "grad_norm": 0.23448750376701355, "learning_rate": 2.0575589459084607e-06, "loss": 0.0031, "step": 11464 }, { "epoch": 7.942500865950814, "grad_norm": 0.09061235189437866, "learning_rate": 2.056865464632455e-06, "loss": 0.0015, "step": 11465 }, { "epoch": 7.943193626602009, "grad_norm": 0.33940520882606506, "learning_rate": 2.0561719833564493e-06, "loss": 0.0033, "step": 11466 }, { "epoch": 7.943886387253204, "grad_norm": 0.26225733757019043, "learning_rate": 2.055478502080444e-06, "loss": 0.0023, "step": 11467 }, { "epoch": 7.944579147904399, "grad_norm": 0.07459697127342224, "learning_rate": 2.0547850208044383e-06, "loss": 0.0015, "step": 11468 }, { "epoch": 7.945271908555594, "grad_norm": 0.06712120026350021, "learning_rate": 2.054091539528433e-06, "loss": 0.0012, "step": 11469 }, { "epoch": 7.945964669206789, "grad_norm": 0.20380155742168427, "learning_rate": 2.0533980582524273e-06, "loss": 0.0017, "step": 11470 }, { "epoch": 7.946657429857984, "grad_norm": 0.13156256079673767, "learning_rate": 2.052704576976422e-06, "loss": 0.0018, "step": 11471 }, { "epoch": 7.947350190509179, "grad_norm": 0.3585037589073181, "learning_rate": 2.0520110957004163e-06, "loss": 0.0045, "step": 11472 }, { "epoch": 7.9480429511603745, "grad_norm": 0.06246247887611389, "learning_rate": 2.051317614424411e-06, "loss": 0.0014, "step": 11473 }, { "epoch": 7.948735711811569, "grad_norm": 0.11869733035564423, "learning_rate": 2.050624133148405e-06, "loss": 0.0017, "step": 11474 }, { "epoch": 7.949428472462764, "grad_norm": 0.12826837599277496, "learning_rate": 2.0499306518724e-06, "loss": 0.0013, "step": 11475 }, { "epoch": 7.9501212331139595, "grad_norm": 0.07099510729312897, "learning_rate": 2.049237170596394e-06, "loss": 0.0015, "step": 11476 }, { "epoch": 7.950813993765154, "grad_norm": 0.0927015021443367, "learning_rate": 2.0485436893203885e-06, "loss": 0.0016, "step": 11477 }, { "epoch": 7.951506754416349, "grad_norm": 0.14832226932048798, "learning_rate": 2.047850208044383e-06, "loss": 0.002, "step": 11478 }, { "epoch": 7.952199515067544, "grad_norm": 0.1016480028629303, "learning_rate": 2.0471567267683775e-06, "loss": 0.0014, "step": 11479 }, { "epoch": 7.952892275718739, "grad_norm": 0.08733993023633957, "learning_rate": 2.046463245492372e-06, "loss": 0.0013, "step": 11480 }, { "epoch": 7.953585036369934, "grad_norm": 0.18449991941452026, "learning_rate": 2.045769764216366e-06, "loss": 0.002, "step": 11481 }, { "epoch": 7.954277797021129, "grad_norm": 0.278288334608078, "learning_rate": 2.045076282940361e-06, "loss": 0.0019, "step": 11482 }, { "epoch": 7.954970557672324, "grad_norm": 0.0932365208864212, "learning_rate": 2.044382801664355e-06, "loss": 0.0015, "step": 11483 }, { "epoch": 7.955663318323519, "grad_norm": 0.2478647381067276, "learning_rate": 2.0436893203883496e-06, "loss": 0.0021, "step": 11484 }, { "epoch": 7.956356078974714, "grad_norm": 0.08164886385202408, "learning_rate": 2.042995839112344e-06, "loss": 0.0013, "step": 11485 }, { "epoch": 7.95704883962591, "grad_norm": 0.5557247996330261, "learning_rate": 2.0423023578363386e-06, "loss": 0.0064, "step": 11486 }, { "epoch": 7.957741600277104, "grad_norm": 0.08273177593946457, "learning_rate": 2.041608876560333e-06, "loss": 0.0014, "step": 11487 }, { "epoch": 7.958434360928299, "grad_norm": 0.109470434486866, "learning_rate": 2.0409153952843276e-06, "loss": 0.0018, "step": 11488 }, { "epoch": 7.9591271215794945, "grad_norm": 0.08045624941587448, "learning_rate": 2.0402219140083217e-06, "loss": 0.0014, "step": 11489 }, { "epoch": 7.959819882230689, "grad_norm": 0.11285190284252167, "learning_rate": 2.0395284327323162e-06, "loss": 0.0014, "step": 11490 }, { "epoch": 7.960512642881884, "grad_norm": 0.08281496912240982, "learning_rate": 2.0388349514563107e-06, "loss": 0.0014, "step": 11491 }, { "epoch": 7.9612054035330795, "grad_norm": 0.25705453753471375, "learning_rate": 2.0381414701803053e-06, "loss": 0.0026, "step": 11492 }, { "epoch": 7.961898164184275, "grad_norm": 0.07038560509681702, "learning_rate": 2.0374479889042998e-06, "loss": 0.0012, "step": 11493 }, { "epoch": 7.962590924835469, "grad_norm": 0.08487432450056076, "learning_rate": 2.0367545076282943e-06, "loss": 0.0013, "step": 11494 }, { "epoch": 7.963283685486664, "grad_norm": 0.07545677572488785, "learning_rate": 2.0360610263522888e-06, "loss": 0.0015, "step": 11495 }, { "epoch": 7.96397644613786, "grad_norm": 0.09319596737623215, "learning_rate": 2.035367545076283e-06, "loss": 0.0016, "step": 11496 }, { "epoch": 7.964669206789054, "grad_norm": 0.17455600202083588, "learning_rate": 2.034674063800278e-06, "loss": 0.0027, "step": 11497 }, { "epoch": 7.965361967440249, "grad_norm": 0.09785149991512299, "learning_rate": 2.033980582524272e-06, "loss": 0.0016, "step": 11498 }, { "epoch": 7.966054728091445, "grad_norm": 0.21693672239780426, "learning_rate": 2.0332871012482664e-06, "loss": 0.0023, "step": 11499 }, { "epoch": 7.966747488742639, "grad_norm": 0.15302878618240356, "learning_rate": 2.032593619972261e-06, "loss": 0.002, "step": 11500 }, { "epoch": 7.967440249393834, "grad_norm": 0.15403737127780914, "learning_rate": 2.0319001386962554e-06, "loss": 0.0018, "step": 11501 }, { "epoch": 7.96813301004503, "grad_norm": 0.2522105276584625, "learning_rate": 2.03120665742025e-06, "loss": 0.0025, "step": 11502 }, { "epoch": 7.968825770696224, "grad_norm": 0.10669081658124924, "learning_rate": 2.0305131761442444e-06, "loss": 0.0017, "step": 11503 }, { "epoch": 7.969518531347419, "grad_norm": 0.06838635355234146, "learning_rate": 2.0298196948682385e-06, "loss": 0.0011, "step": 11504 }, { "epoch": 7.9702112919986146, "grad_norm": 0.32742977142333984, "learning_rate": 2.029126213592233e-06, "loss": 0.002, "step": 11505 }, { "epoch": 7.97090405264981, "grad_norm": 0.40055006742477417, "learning_rate": 2.0284327323162275e-06, "loss": 0.0019, "step": 11506 }, { "epoch": 7.971596813301004, "grad_norm": 0.12538351118564606, "learning_rate": 2.027739251040222e-06, "loss": 0.0018, "step": 11507 }, { "epoch": 7.9722895739521995, "grad_norm": 0.07275067269802094, "learning_rate": 2.0270457697642166e-06, "loss": 0.0014, "step": 11508 }, { "epoch": 7.972982334603395, "grad_norm": 0.19842331111431122, "learning_rate": 2.0263522884882107e-06, "loss": 0.0024, "step": 11509 }, { "epoch": 7.973675095254589, "grad_norm": 0.07606042176485062, "learning_rate": 2.0256588072122056e-06, "loss": 0.0014, "step": 11510 }, { "epoch": 7.9743678559057845, "grad_norm": 0.09152119606733322, "learning_rate": 2.0249653259361997e-06, "loss": 0.0012, "step": 11511 }, { "epoch": 7.97506061655698, "grad_norm": 0.12996184825897217, "learning_rate": 2.0242718446601946e-06, "loss": 0.0016, "step": 11512 }, { "epoch": 7.975753377208175, "grad_norm": 0.08830322325229645, "learning_rate": 2.0235783633841887e-06, "loss": 0.0016, "step": 11513 }, { "epoch": 7.976446137859369, "grad_norm": 0.08426600694656372, "learning_rate": 2.022884882108183e-06, "loss": 0.0014, "step": 11514 }, { "epoch": 7.977138898510565, "grad_norm": 0.1549033224582672, "learning_rate": 2.0221914008321777e-06, "loss": 0.0018, "step": 11515 }, { "epoch": 7.97783165916176, "grad_norm": 0.22533456981182098, "learning_rate": 2.0214979195561722e-06, "loss": 0.0021, "step": 11516 }, { "epoch": 7.978524419812954, "grad_norm": 0.08146145939826965, "learning_rate": 2.0208044382801667e-06, "loss": 0.0015, "step": 11517 }, { "epoch": 7.97921718046415, "grad_norm": 0.13984590768814087, "learning_rate": 2.0201109570041612e-06, "loss": 0.0017, "step": 11518 }, { "epoch": 7.979909941115345, "grad_norm": 0.18048150837421417, "learning_rate": 2.0194174757281553e-06, "loss": 0.0017, "step": 11519 }, { "epoch": 7.980602701766539, "grad_norm": 0.14031150937080383, "learning_rate": 2.01872399445215e-06, "loss": 0.0014, "step": 11520 }, { "epoch": 7.981295462417735, "grad_norm": 0.1632668375968933, "learning_rate": 2.0180305131761443e-06, "loss": 0.0017, "step": 11521 }, { "epoch": 7.98198822306893, "grad_norm": 0.11185518652200699, "learning_rate": 2.017337031900139e-06, "loss": 0.0016, "step": 11522 }, { "epoch": 7.982680983720124, "grad_norm": 0.11018916964530945, "learning_rate": 2.0166435506241334e-06, "loss": 0.0017, "step": 11523 }, { "epoch": 7.9833737443713195, "grad_norm": 0.819999098777771, "learning_rate": 2.0159500693481275e-06, "loss": 0.0022, "step": 11524 }, { "epoch": 7.984066505022515, "grad_norm": 0.15081410109996796, "learning_rate": 2.0152565880721224e-06, "loss": 0.0033, "step": 11525 }, { "epoch": 7.98475926567371, "grad_norm": 0.48248326778411865, "learning_rate": 2.0145631067961165e-06, "loss": 0.0025, "step": 11526 }, { "epoch": 7.9854520263249045, "grad_norm": 0.13126465678215027, "learning_rate": 2.0138696255201114e-06, "loss": 0.0016, "step": 11527 }, { "epoch": 7.9861447869761, "grad_norm": 0.08971597254276276, "learning_rate": 2.0131761442441055e-06, "loss": 0.0015, "step": 11528 }, { "epoch": 7.986837547627295, "grad_norm": 0.1712157279253006, "learning_rate": 2.0124826629681e-06, "loss": 0.0016, "step": 11529 }, { "epoch": 7.987530308278489, "grad_norm": 0.1463373899459839, "learning_rate": 2.0117891816920945e-06, "loss": 0.0019, "step": 11530 }, { "epoch": 7.988223068929685, "grad_norm": 0.1179339662194252, "learning_rate": 2.011095700416089e-06, "loss": 0.0013, "step": 11531 }, { "epoch": 7.98891582958088, "grad_norm": 0.29913946986198425, "learning_rate": 2.0104022191400835e-06, "loss": 0.0016, "step": 11532 }, { "epoch": 7.989608590232075, "grad_norm": 0.0902574211359024, "learning_rate": 2.009708737864078e-06, "loss": 0.0014, "step": 11533 }, { "epoch": 7.99030135088327, "grad_norm": 0.08017268776893616, "learning_rate": 2.009015256588072e-06, "loss": 0.0014, "step": 11534 }, { "epoch": 7.990994111534465, "grad_norm": 0.15926645696163177, "learning_rate": 2.0083217753120666e-06, "loss": 0.0018, "step": 11535 }, { "epoch": 7.99168687218566, "grad_norm": 0.14761313796043396, "learning_rate": 2.007628294036061e-06, "loss": 0.0019, "step": 11536 }, { "epoch": 7.992379632836855, "grad_norm": 0.21028392016887665, "learning_rate": 2.0069348127600557e-06, "loss": 0.0018, "step": 11537 }, { "epoch": 7.99307239348805, "grad_norm": 0.17841485142707825, "learning_rate": 2.00624133148405e-06, "loss": 0.0039, "step": 11538 }, { "epoch": 7.993765154139245, "grad_norm": 0.08202453702688217, "learning_rate": 2.0055478502080443e-06, "loss": 0.0013, "step": 11539 }, { "epoch": 7.9944579147904395, "grad_norm": 0.1495659202337265, "learning_rate": 2.004854368932039e-06, "loss": 0.0016, "step": 11540 }, { "epoch": 7.995150675441635, "grad_norm": 0.4311334788799286, "learning_rate": 2.0041608876560333e-06, "loss": 0.0028, "step": 11541 }, { "epoch": 7.99584343609283, "grad_norm": 0.10675432533025742, "learning_rate": 2.003467406380028e-06, "loss": 0.0017, "step": 11542 }, { "epoch": 7.9965361967440245, "grad_norm": 0.12781338393688202, "learning_rate": 2.0027739251040223e-06, "loss": 0.002, "step": 11543 }, { "epoch": 7.99722895739522, "grad_norm": 0.1689104437828064, "learning_rate": 2.002080443828017e-06, "loss": 0.0022, "step": 11544 }, { "epoch": 7.997921718046415, "grad_norm": 0.1593383103609085, "learning_rate": 2.0013869625520113e-06, "loss": 0.0021, "step": 11545 }, { "epoch": 7.99861447869761, "grad_norm": 0.2896552085876465, "learning_rate": 2.000693481276006e-06, "loss": 0.0018, "step": 11546 }, { "epoch": 7.999307239348805, "grad_norm": 0.08897218853235245, "learning_rate": 2.0000000000000003e-06, "loss": 0.0015, "step": 11547 }, { "epoch": 8.0, "grad_norm": 0.16805683076381683, "learning_rate": 1.9993065187239944e-06, "loss": 0.0025, "step": 11548 }, { "epoch": 8.0, "eval_loss": 0.31035521626472473, "eval_runtime": 7617.722, "eval_samples_per_second": 1.05, "eval_steps_per_second": 0.033, "eval_wer": 12.103847094587513, "step": 11548 } ], "logging_steps": 1, "max_steps": 14430, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.277245051186708e+21, "train_batch_size": 80, "trial_name": null, "trial_params": null }