{ "best_metric": 0.7291744947433472, "best_model_checkpoint": "models_gitignored/bert-base-uncased-finetuned-sentence-classification/checkpoint-25252", "epoch": 3.0, "global_step": 75756, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.9920798352605737e-05, "loss": 1.2823, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.984159670521147e-05, "loss": 1.0188, "step": 1000 }, { "epoch": 0.06, "learning_rate": 1.9762395057817204e-05, "loss": 0.918, "step": 1500 }, { "epoch": 0.08, "learning_rate": 1.968319341042294e-05, "loss": 0.8996, "step": 2000 }, { "epoch": 0.1, "learning_rate": 1.9603991763028674e-05, "loss": 0.8202, "step": 2500 }, { "epoch": 0.12, "learning_rate": 1.9524790115634406e-05, "loss": 0.8602, "step": 3000 }, { "epoch": 0.14, "learning_rate": 1.944558846824014e-05, "loss": 0.8396, "step": 3500 }, { "epoch": 0.16, "learning_rate": 1.9366386820845877e-05, "loss": 0.8208, "step": 4000 }, { "epoch": 0.18, "learning_rate": 1.9287185173451612e-05, "loss": 0.8678, "step": 4500 }, { "epoch": 0.2, "learning_rate": 1.9207983526057344e-05, "loss": 0.7868, "step": 5000 }, { "epoch": 0.22, "learning_rate": 1.912878187866308e-05, "loss": 0.8267, "step": 5500 }, { "epoch": 0.24, "learning_rate": 1.904958023126881e-05, "loss": 0.7753, "step": 6000 }, { "epoch": 0.26, "learning_rate": 1.8970378583874546e-05, "loss": 0.7755, "step": 6500 }, { "epoch": 0.28, "learning_rate": 1.889117693648028e-05, "loss": 0.7856, "step": 7000 }, { "epoch": 0.3, "learning_rate": 1.8811975289086016e-05, "loss": 0.8204, "step": 7500 }, { "epoch": 0.32, "learning_rate": 1.8732773641691748e-05, "loss": 0.8075, "step": 8000 }, { "epoch": 0.34, "learning_rate": 1.8653571994297483e-05, "loss": 0.7782, "step": 8500 }, { "epoch": 0.36, "learning_rate": 1.8574370346903215e-05, "loss": 0.7993, "step": 9000 }, { "epoch": 0.38, "learning_rate": 1.849516869950895e-05, "loss": 0.785, "step": 9500 }, { "epoch": 0.4, "learning_rate": 1.8415967052114686e-05, "loss": 0.7602, "step": 10000 }, { "epoch": 0.42, "learning_rate": 1.833676540472042e-05, "loss": 0.7536, "step": 10500 }, { "epoch": 0.44, "learning_rate": 1.8257563757326153e-05, "loss": 0.763, "step": 11000 }, { "epoch": 0.46, "learning_rate": 1.8178362109931888e-05, "loss": 0.7935, "step": 11500 }, { "epoch": 0.48, "learning_rate": 1.8099160462537623e-05, "loss": 0.7566, "step": 12000 }, { "epoch": 0.5, "learning_rate": 1.8019958815143358e-05, "loss": 0.7797, "step": 12500 }, { "epoch": 0.51, "learning_rate": 1.794075716774909e-05, "loss": 0.7548, "step": 13000 }, { "epoch": 0.53, "learning_rate": 1.7861555520354825e-05, "loss": 0.7658, "step": 13500 }, { "epoch": 0.55, "learning_rate": 1.7782353872960557e-05, "loss": 0.7945, "step": 14000 }, { "epoch": 0.57, "learning_rate": 1.7703152225566292e-05, "loss": 0.779, "step": 14500 }, { "epoch": 0.59, "learning_rate": 1.7623950578172028e-05, "loss": 0.7521, "step": 15000 }, { "epoch": 0.61, "learning_rate": 1.7544748930777763e-05, "loss": 0.745, "step": 15500 }, { "epoch": 0.63, "learning_rate": 1.7465547283383495e-05, "loss": 0.7792, "step": 16000 }, { "epoch": 0.65, "learning_rate": 1.738634563598923e-05, "loss": 0.7402, "step": 16500 }, { "epoch": 0.67, "learning_rate": 1.730714398859496e-05, "loss": 0.7552, "step": 17000 }, { "epoch": 0.69, "learning_rate": 1.7227942341200697e-05, "loss": 0.7849, "step": 17500 }, { "epoch": 0.71, "learning_rate": 1.7148740693806432e-05, "loss": 0.7717, "step": 18000 }, { "epoch": 0.73, "learning_rate": 1.7069539046412167e-05, "loss": 0.7652, "step": 18500 }, { "epoch": 0.75, "learning_rate": 1.69903373990179e-05, "loss": 0.7521, "step": 19000 }, { "epoch": 0.77, "learning_rate": 1.6911135751623634e-05, "loss": 0.7684, "step": 19500 }, { "epoch": 0.79, "learning_rate": 1.683193410422937e-05, "loss": 0.7492, "step": 20000 }, { "epoch": 0.81, "learning_rate": 1.6752732456835105e-05, "loss": 0.7718, "step": 20500 }, { "epoch": 0.83, "learning_rate": 1.667353080944084e-05, "loss": 0.7287, "step": 21000 }, { "epoch": 0.85, "learning_rate": 1.6594329162046572e-05, "loss": 0.7685, "step": 21500 }, { "epoch": 0.87, "learning_rate": 1.6515127514652307e-05, "loss": 0.7597, "step": 22000 }, { "epoch": 0.89, "learning_rate": 1.643592586725804e-05, "loss": 0.7373, "step": 22500 }, { "epoch": 0.91, "learning_rate": 1.6356724219863774e-05, "loss": 0.7615, "step": 23000 }, { "epoch": 0.93, "learning_rate": 1.627752257246951e-05, "loss": 0.7356, "step": 23500 }, { "epoch": 0.95, "learning_rate": 1.6198320925075244e-05, "loss": 0.7738, "step": 24000 }, { "epoch": 0.97, "learning_rate": 1.6119119277680976e-05, "loss": 0.7424, "step": 24500 }, { "epoch": 0.99, "learning_rate": 1.603991763028671e-05, "loss": 0.728, "step": 25000 }, { "epoch": 1.0, "eval_accuracy": 0.7828767360509818, "eval_f1": 0.7791440516416176, "eval_kappa": 0.7055247162851201, "eval_loss": 0.7291744947433472, "eval_precision": 0.7811546650503057, "eval_recall": 0.7828767360509818, "eval_runtime": 304.9713, "eval_samples_per_second": 94.674, "eval_steps_per_second": 23.671, "step": 25252 }, { "epoch": 1.01, "learning_rate": 1.5960715982892447e-05, "loss": 0.659, "step": 25500 }, { "epoch": 1.03, "learning_rate": 1.5881514335498182e-05, "loss": 0.5986, "step": 26000 }, { "epoch": 1.05, "learning_rate": 1.5802312688103914e-05, "loss": 0.6172, "step": 26500 }, { "epoch": 1.07, "learning_rate": 1.572311104070965e-05, "loss": 0.6544, "step": 27000 }, { "epoch": 1.09, "learning_rate": 1.564390939331538e-05, "loss": 0.6441, "step": 27500 }, { "epoch": 1.11, "learning_rate": 1.5564707745921116e-05, "loss": 0.604, "step": 28000 }, { "epoch": 1.13, "learning_rate": 1.548550609852685e-05, "loss": 0.5808, "step": 28500 }, { "epoch": 1.15, "learning_rate": 1.5406304451132586e-05, "loss": 0.6685, "step": 29000 }, { "epoch": 1.17, "learning_rate": 1.5327102803738318e-05, "loss": 0.6581, "step": 29500 }, { "epoch": 1.19, "learning_rate": 1.5247901156344053e-05, "loss": 0.6344, "step": 30000 }, { "epoch": 1.21, "learning_rate": 1.5168699508949787e-05, "loss": 0.6548, "step": 30500 }, { "epoch": 1.23, "learning_rate": 1.5089497861555522e-05, "loss": 0.6726, "step": 31000 }, { "epoch": 1.25, "learning_rate": 1.5010296214161256e-05, "loss": 0.6694, "step": 31500 }, { "epoch": 1.27, "learning_rate": 1.4931094566766991e-05, "loss": 0.6791, "step": 32000 }, { "epoch": 1.29, "learning_rate": 1.4851892919372723e-05, "loss": 0.6322, "step": 32500 }, { "epoch": 1.31, "learning_rate": 1.4772691271978458e-05, "loss": 0.648, "step": 33000 }, { "epoch": 1.33, "learning_rate": 1.4693489624584191e-05, "loss": 0.6643, "step": 33500 }, { "epoch": 1.35, "learning_rate": 1.4614287977189927e-05, "loss": 0.6545, "step": 34000 }, { "epoch": 1.37, "learning_rate": 1.453508632979566e-05, "loss": 0.5887, "step": 34500 }, { "epoch": 1.39, "learning_rate": 1.4455884682401395e-05, "loss": 0.6611, "step": 35000 }, { "epoch": 1.41, "learning_rate": 1.4376683035007129e-05, "loss": 0.6729, "step": 35500 }, { "epoch": 1.43, "learning_rate": 1.4297481387612864e-05, "loss": 0.631, "step": 36000 }, { "epoch": 1.45, "learning_rate": 1.4218279740218596e-05, "loss": 0.6706, "step": 36500 }, { "epoch": 1.47, "learning_rate": 1.4139078092824333e-05, "loss": 0.6433, "step": 37000 }, { "epoch": 1.49, "learning_rate": 1.4059876445430065e-05, "loss": 0.5823, "step": 37500 }, { "epoch": 1.5, "learning_rate": 1.39806747980358e-05, "loss": 0.6519, "step": 38000 }, { "epoch": 1.52, "learning_rate": 1.3901473150641533e-05, "loss": 0.644, "step": 38500 }, { "epoch": 1.54, "learning_rate": 1.3822271503247269e-05, "loss": 0.6669, "step": 39000 }, { "epoch": 1.56, "learning_rate": 1.3743069855853002e-05, "loss": 0.6936, "step": 39500 }, { "epoch": 1.58, "learning_rate": 1.3663868208458737e-05, "loss": 0.6298, "step": 40000 }, { "epoch": 1.6, "learning_rate": 1.3584666561064473e-05, "loss": 0.7215, "step": 40500 }, { "epoch": 1.62, "learning_rate": 1.3505464913670206e-05, "loss": 0.6555, "step": 41000 }, { "epoch": 1.64, "learning_rate": 1.3426263266275941e-05, "loss": 0.6789, "step": 41500 }, { "epoch": 1.66, "learning_rate": 1.3347061618881673e-05, "loss": 0.6386, "step": 42000 }, { "epoch": 1.68, "learning_rate": 1.3267859971487408e-05, "loss": 0.593, "step": 42500 }, { "epoch": 1.7, "learning_rate": 1.3188658324093142e-05, "loss": 0.6504, "step": 43000 }, { "epoch": 1.72, "learning_rate": 1.3109456676698877e-05, "loss": 0.6565, "step": 43500 }, { "epoch": 1.74, "learning_rate": 1.303025502930461e-05, "loss": 0.6541, "step": 44000 }, { "epoch": 1.76, "learning_rate": 1.2951053381910346e-05, "loss": 0.6761, "step": 44500 }, { "epoch": 1.78, "learning_rate": 1.287185173451608e-05, "loss": 0.658, "step": 45000 }, { "epoch": 1.8, "learning_rate": 1.2792650087121815e-05, "loss": 0.6487, "step": 45500 }, { "epoch": 1.82, "learning_rate": 1.2713448439727546e-05, "loss": 0.6753, "step": 46000 }, { "epoch": 1.84, "learning_rate": 1.2634246792333282e-05, "loss": 0.64, "step": 46500 }, { "epoch": 1.86, "learning_rate": 1.2555045144939015e-05, "loss": 0.6226, "step": 47000 }, { "epoch": 1.88, "learning_rate": 1.247584349754475e-05, "loss": 0.7033, "step": 47500 }, { "epoch": 1.9, "learning_rate": 1.2396641850150484e-05, "loss": 0.6832, "step": 48000 }, { "epoch": 1.92, "learning_rate": 1.2317440202756219e-05, "loss": 0.6237, "step": 48500 }, { "epoch": 1.94, "learning_rate": 1.2238238555361953e-05, "loss": 0.6542, "step": 49000 }, { "epoch": 1.96, "learning_rate": 1.2159036907967688e-05, "loss": 0.6677, "step": 49500 }, { "epoch": 1.98, "learning_rate": 1.207983526057342e-05, "loss": 0.6236, "step": 50000 }, { "epoch": 2.0, "learning_rate": 1.2000633613179155e-05, "loss": 0.6123, "step": 50500 }, { "epoch": 2.0, "eval_accuracy": 0.781352820974613, "eval_f1": 0.779187798721889, "eval_kappa": 0.7064476335084646, "eval_loss": 0.8750618696212769, "eval_precision": 0.7867337948169639, "eval_recall": 0.781352820974613, "eval_runtime": 303.7391, "eval_samples_per_second": 95.059, "eval_steps_per_second": 23.767, "step": 50504 }, { "epoch": 2.02, "learning_rate": 1.1921431965784888e-05, "loss": 0.453, "step": 51000 }, { "epoch": 2.04, "learning_rate": 1.1842230318390624e-05, "loss": 0.5324, "step": 51500 }, { "epoch": 2.06, "learning_rate": 1.1763028670996357e-05, "loss": 0.4745, "step": 52000 }, { "epoch": 2.08, "learning_rate": 1.1683827023602092e-05, "loss": 0.5104, "step": 52500 }, { "epoch": 2.1, "learning_rate": 1.1604625376207826e-05, "loss": 0.5623, "step": 53000 }, { "epoch": 2.12, "learning_rate": 1.1525423728813561e-05, "loss": 0.4998, "step": 53500 }, { "epoch": 2.14, "learning_rate": 1.1446222081419293e-05, "loss": 0.5157, "step": 54000 }, { "epoch": 2.16, "learning_rate": 1.136702043402503e-05, "loss": 0.4614, "step": 54500 }, { "epoch": 2.18, "learning_rate": 1.1287818786630762e-05, "loss": 0.5011, "step": 55000 }, { "epoch": 2.2, "learning_rate": 1.1208617139236497e-05, "loss": 0.5621, "step": 55500 }, { "epoch": 2.22, "learning_rate": 1.112941549184223e-05, "loss": 0.517, "step": 56000 }, { "epoch": 2.24, "learning_rate": 1.1050213844447966e-05, "loss": 0.5152, "step": 56500 }, { "epoch": 2.26, "learning_rate": 1.0971012197053699e-05, "loss": 0.5192, "step": 57000 }, { "epoch": 2.28, "learning_rate": 1.0891810549659434e-05, "loss": 0.474, "step": 57500 }, { "epoch": 2.3, "learning_rate": 1.0812608902265168e-05, "loss": 0.5443, "step": 58000 }, { "epoch": 2.32, "learning_rate": 1.0733407254870903e-05, "loss": 0.4717, "step": 58500 }, { "epoch": 2.34, "learning_rate": 1.0654205607476635e-05, "loss": 0.5105, "step": 59000 }, { "epoch": 2.36, "learning_rate": 1.057500396008237e-05, "loss": 0.5299, "step": 59500 }, { "epoch": 2.38, "learning_rate": 1.0495802312688104e-05, "loss": 0.5105, "step": 60000 }, { "epoch": 2.4, "learning_rate": 1.0416600665293839e-05, "loss": 0.5919, "step": 60500 }, { "epoch": 2.42, "learning_rate": 1.0337399017899574e-05, "loss": 0.5423, "step": 61000 }, { "epoch": 2.44, "learning_rate": 1.0258197370505308e-05, "loss": 0.4779, "step": 61500 }, { "epoch": 2.46, "learning_rate": 1.0178995723111043e-05, "loss": 0.5086, "step": 62000 }, { "epoch": 2.48, "learning_rate": 1.0099794075716776e-05, "loss": 0.5192, "step": 62500 }, { "epoch": 2.49, "learning_rate": 1.0020592428322511e-05, "loss": 0.5096, "step": 63000 }, { "epoch": 2.51, "learning_rate": 9.941390780928243e-06, "loss": 0.5558, "step": 63500 }, { "epoch": 2.53, "learning_rate": 9.862189133533979e-06, "loss": 0.5074, "step": 64000 }, { "epoch": 2.55, "learning_rate": 9.782987486139712e-06, "loss": 0.5131, "step": 64500 }, { "epoch": 2.57, "learning_rate": 9.703785838745447e-06, "loss": 0.5521, "step": 65000 }, { "epoch": 2.59, "learning_rate": 9.62458419135118e-06, "loss": 0.4883, "step": 65500 }, { "epoch": 2.61, "learning_rate": 9.545382543956914e-06, "loss": 0.5142, "step": 66000 }, { "epoch": 2.63, "learning_rate": 9.46618089656265e-06, "loss": 0.5255, "step": 66500 }, { "epoch": 2.65, "learning_rate": 9.386979249168383e-06, "loss": 0.5168, "step": 67000 }, { "epoch": 2.67, "learning_rate": 9.307777601774117e-06, "loss": 0.5303, "step": 67500 }, { "epoch": 2.69, "learning_rate": 9.228575954379852e-06, "loss": 0.4962, "step": 68000 }, { "epoch": 2.71, "learning_rate": 9.149374306985585e-06, "loss": 0.531, "step": 68500 }, { "epoch": 2.73, "learning_rate": 9.07017265959132e-06, "loss": 0.5689, "step": 69000 }, { "epoch": 2.75, "learning_rate": 8.990971012197054e-06, "loss": 0.547, "step": 69500 }, { "epoch": 2.77, "learning_rate": 8.911769364802788e-06, "loss": 0.5206, "step": 70000 }, { "epoch": 2.79, "learning_rate": 8.832567717408523e-06, "loss": 0.5492, "step": 70500 }, { "epoch": 2.81, "learning_rate": 8.753366070014258e-06, "loss": 0.5338, "step": 71000 }, { "epoch": 2.83, "learning_rate": 8.674164422619991e-06, "loss": 0.5102, "step": 71500 }, { "epoch": 2.85, "learning_rate": 8.594962775225727e-06, "loss": 0.5333, "step": 72000 }, { "epoch": 2.87, "learning_rate": 8.51576112783146e-06, "loss": 0.4985, "step": 72500 }, { "epoch": 2.89, "learning_rate": 8.436559480437194e-06, "loss": 0.4929, "step": 73000 }, { "epoch": 2.91, "learning_rate": 8.357357833042929e-06, "loss": 0.4884, "step": 73500 }, { "epoch": 2.93, "learning_rate": 8.278156185648662e-06, "loss": 0.5154, "step": 74000 }, { "epoch": 2.95, "learning_rate": 8.198954538254396e-06, "loss": 0.5431, "step": 74500 }, { "epoch": 2.97, "learning_rate": 8.119752890860131e-06, "loss": 0.4838, "step": 75000 }, { "epoch": 2.99, "learning_rate": 8.040551243465865e-06, "loss": 0.4605, "step": 75500 }, { "epoch": 3.0, "eval_accuracy": 0.7850587053648738, "eval_f1": 0.7838888247253561, "eval_kappa": 0.7119236186647375, "eval_loss": 0.9557045102119446, "eval_precision": 0.7851328181797262, "eval_recall": 0.7850587053648738, "eval_runtime": 304.9153, "eval_samples_per_second": 94.692, "eval_steps_per_second": 23.675, "step": 75756 } ], "max_steps": 126260, "num_train_epochs": 5, "total_flos": 7.766798662066938e+16, "trial_name": null, "trial_params": null }