gokuls commited on
Commit
7670d1c
1 Parent(s): 8b57369

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,31 @@
1
  ---
 
 
2
  license: apache-2.0
3
  tags:
4
  - generated_from_trainer
 
 
5
  metrics:
6
  - accuracy
7
  - f1
8
  model-index:
9
  - name: mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_mrpc
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,12 +33,12 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_mrpc
17
 
18
- This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.1274
21
- - Accuracy: 0.9902
22
- - F1: 0.9929
23
- - Combined Score: 0.9915
24
 
25
  ## Model description
26
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - glue
9
  metrics:
10
  - accuracy
11
  - f1
12
  model-index:
13
  - name: mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_mrpc
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE MRPC
20
+ type: glue
21
+ args: mrpc
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.9877450980392157
26
+ - name: F1
27
+ type: f1
28
+ value: 0.9911190053285969
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  # mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_mrpc
35
 
36
+ This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the GLUE MRPC dataset.
37
  It achieves the following results on the evaluation set:
38
+ - Loss: 0.1256
39
+ - Accuracy: 0.9877
40
+ - F1: 0.9911
41
+ - Combined Score: 0.9894
42
 
43
  ## Model description
44
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 46.0,
3
+ "eval_accuracy": 0.9877450980392157,
4
+ "eval_combined_score": 0.9894320516839064,
5
+ "eval_f1": 0.9911190053285969,
6
+ "eval_loss": 0.1256455034017563,
7
+ "eval_runtime": 0.7942,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 513.756,
10
+ "eval_steps_per_second": 5.037,
11
+ "train_loss": 0.19228331322046327,
12
+ "train_runtime": 51035.534,
13
+ "train_samples": 250736,
14
+ "train_samples_per_second": 245.648,
15
+ "train_steps_per_second": 1.919
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 46.0,
3
+ "eval_accuracy": 0.9877450980392157,
4
+ "eval_combined_score": 0.9894320516839064,
5
+ "eval_f1": 0.9911190053285969,
6
+ "eval_loss": 0.1256455034017563,
7
+ "eval_runtime": 0.7942,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 513.756,
10
+ "eval_steps_per_second": 5.037
11
+ }
logs/events.out.tfevents.1675349089.serv-3334.808655.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbb5174e6b011d58f9fe88b25651ec80cd4be2f5205cfbe71b4340f2f319ff7
3
+ size 475
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 46.0,
3
+ "train_loss": 0.19228331322046327,
4
+ "train_runtime": 51035.534,
5
+ "train_samples": 250736,
6
+ "train_samples_per_second": 245.648,
7
+ "train_steps_per_second": 1.919
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,807 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1256455034017563,
3
+ "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_mrpc/checkpoint-80319",
4
+ "epoch": 46.0,
5
+ "global_step": 90114,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 4.9e-05,
13
+ "loss": 0.2964,
14
+ "step": 1959
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.9607843137254902,
19
+ "eval_combined_score": 0.9663076498204916,
20
+ "eval_f1": 0.9718309859154929,
21
+ "eval_loss": 0.20259901881217957,
22
+ "eval_runtime": 0.7823,
23
+ "eval_samples_per_second": 521.547,
24
+ "eval_steps_per_second": 5.113,
25
+ "step": 1959
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "learning_rate": 4.8e-05,
30
+ "loss": 0.2307,
31
+ "step": 3918
32
+ },
33
+ {
34
+ "epoch": 2.0,
35
+ "eval_accuracy": 0.9705882352941176,
36
+ "eval_combined_score": 0.9747678018575852,
37
+ "eval_f1": 0.9789473684210526,
38
+ "eval_loss": 0.19426828622817993,
39
+ "eval_runtime": 0.7765,
40
+ "eval_samples_per_second": 525.442,
41
+ "eval_steps_per_second": 5.151,
42
+ "step": 3918
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "learning_rate": 4.7e-05,
47
+ "loss": 0.2221,
48
+ "step": 5877
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.9803921568627451,
53
+ "eval_combined_score": 0.9831038798498122,
54
+ "eval_f1": 0.9858156028368794,
55
+ "eval_loss": 0.18736393749713898,
56
+ "eval_runtime": 0.773,
57
+ "eval_samples_per_second": 527.78,
58
+ "eval_steps_per_second": 5.174,
59
+ "step": 5877
60
+ },
61
+ {
62
+ "epoch": 4.0,
63
+ "learning_rate": 4.600000000000001e-05,
64
+ "loss": 0.2163,
65
+ "step": 7836
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "eval_accuracy": 0.9852941176470589,
70
+ "eval_combined_score": 0.9873279098873593,
71
+ "eval_f1": 0.9893617021276596,
72
+ "eval_loss": 0.17034168541431427,
73
+ "eval_runtime": 0.7757,
74
+ "eval_samples_per_second": 526.002,
75
+ "eval_steps_per_second": 5.157,
76
+ "step": 7836
77
+ },
78
+ {
79
+ "epoch": 5.0,
80
+ "learning_rate": 4.5e-05,
81
+ "loss": 0.2115,
82
+ "step": 9795
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "eval_accuracy": 0.9852941176470589,
87
+ "eval_combined_score": 0.9873279098873593,
88
+ "eval_f1": 0.9893617021276596,
89
+ "eval_loss": 0.18048794567584991,
90
+ "eval_runtime": 0.7764,
91
+ "eval_samples_per_second": 525.485,
92
+ "eval_steps_per_second": 5.152,
93
+ "step": 9795
94
+ },
95
+ {
96
+ "epoch": 6.0,
97
+ "learning_rate": 4.4000000000000006e-05,
98
+ "loss": 0.2071,
99
+ "step": 11754
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "eval_accuracy": 0.9803921568627451,
104
+ "eval_combined_score": 0.9831289406221853,
105
+ "eval_f1": 0.9858657243816255,
106
+ "eval_loss": 0.16820354759693146,
107
+ "eval_runtime": 0.7809,
108
+ "eval_samples_per_second": 522.481,
109
+ "eval_steps_per_second": 5.122,
110
+ "step": 11754
111
+ },
112
+ {
113
+ "epoch": 7.0,
114
+ "learning_rate": 4.3e-05,
115
+ "loss": 0.2036,
116
+ "step": 13713
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_accuracy": 0.9877450980392157,
121
+ "eval_combined_score": 0.9894320516839064,
122
+ "eval_f1": 0.9911190053285969,
123
+ "eval_loss": 0.15831029415130615,
124
+ "eval_runtime": 0.78,
125
+ "eval_samples_per_second": 523.105,
126
+ "eval_steps_per_second": 5.128,
127
+ "step": 13713
128
+ },
129
+ {
130
+ "epoch": 8.0,
131
+ "learning_rate": 4.2e-05,
132
+ "loss": 0.2007,
133
+ "step": 15672
134
+ },
135
+ {
136
+ "epoch": 8.0,
137
+ "eval_accuracy": 0.9926470588235294,
138
+ "eval_combined_score": 0.9936497326203209,
139
+ "eval_f1": 0.9946524064171123,
140
+ "eval_loss": 0.16275052726268768,
141
+ "eval_runtime": 0.7788,
142
+ "eval_samples_per_second": 523.893,
143
+ "eval_steps_per_second": 5.136,
144
+ "step": 15672
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "learning_rate": 4.1e-05,
149
+ "loss": 0.1985,
150
+ "step": 17631
151
+ },
152
+ {
153
+ "epoch": 9.0,
154
+ "eval_accuracy": 0.9852941176470589,
155
+ "eval_combined_score": 0.9873279098873593,
156
+ "eval_f1": 0.9893617021276596,
157
+ "eval_loss": 0.15478737652301788,
158
+ "eval_runtime": 0.7768,
159
+ "eval_samples_per_second": 525.26,
160
+ "eval_steps_per_second": 5.15,
161
+ "step": 17631
162
+ },
163
+ {
164
+ "epoch": 10.0,
165
+ "learning_rate": 4e-05,
166
+ "loss": 0.1965,
167
+ "step": 19590
168
+ },
169
+ {
170
+ "epoch": 10.0,
171
+ "eval_accuracy": 0.9877450980392157,
172
+ "eval_combined_score": 0.9894162210338681,
173
+ "eval_f1": 0.9910873440285204,
174
+ "eval_loss": 0.15827800333499908,
175
+ "eval_runtime": 0.7788,
176
+ "eval_samples_per_second": 523.897,
177
+ "eval_steps_per_second": 5.136,
178
+ "step": 19590
179
+ },
180
+ {
181
+ "epoch": 11.0,
182
+ "learning_rate": 3.9000000000000006e-05,
183
+ "loss": 0.195,
184
+ "step": 21549
185
+ },
186
+ {
187
+ "epoch": 11.0,
188
+ "eval_accuracy": 0.9901960784313726,
189
+ "eval_combined_score": 0.9915138098250054,
190
+ "eval_f1": 0.992831541218638,
191
+ "eval_loss": 0.1527128666639328,
192
+ "eval_runtime": 0.7814,
193
+ "eval_samples_per_second": 522.15,
194
+ "eval_steps_per_second": 5.119,
195
+ "step": 21549
196
+ },
197
+ {
198
+ "epoch": 12.0,
199
+ "learning_rate": 3.8e-05,
200
+ "loss": 0.1938,
201
+ "step": 23508
202
+ },
203
+ {
204
+ "epoch": 12.0,
205
+ "eval_accuracy": 0.9901960784313726,
206
+ "eval_combined_score": 0.9915266106442577,
207
+ "eval_f1": 0.9928571428571428,
208
+ "eval_loss": 0.15122443437576294,
209
+ "eval_runtime": 0.7775,
210
+ "eval_samples_per_second": 524.756,
211
+ "eval_steps_per_second": 5.145,
212
+ "step": 23508
213
+ },
214
+ {
215
+ "epoch": 13.0,
216
+ "learning_rate": 3.7e-05,
217
+ "loss": 0.1926,
218
+ "step": 25467
219
+ },
220
+ {
221
+ "epoch": 13.0,
222
+ "eval_accuracy": 0.9950980392156863,
223
+ "eval_combined_score": 0.9957633053221289,
224
+ "eval_f1": 0.9964285714285714,
225
+ "eval_loss": 0.1425897628068924,
226
+ "eval_runtime": 0.7755,
227
+ "eval_samples_per_second": 526.144,
228
+ "eval_steps_per_second": 5.158,
229
+ "step": 25467
230
+ },
231
+ {
232
+ "epoch": 14.0,
233
+ "learning_rate": 3.6e-05,
234
+ "loss": 0.1917,
235
+ "step": 27426
236
+ },
237
+ {
238
+ "epoch": 14.0,
239
+ "eval_accuracy": 0.9950980392156863,
240
+ "eval_combined_score": 0.9957633053221289,
241
+ "eval_f1": 0.9964285714285714,
242
+ "eval_loss": 0.14360927045345306,
243
+ "eval_runtime": 0.7791,
244
+ "eval_samples_per_second": 523.666,
245
+ "eval_steps_per_second": 5.134,
246
+ "step": 27426
247
+ },
248
+ {
249
+ "epoch": 15.0,
250
+ "learning_rate": 3.5e-05,
251
+ "loss": 0.191,
252
+ "step": 29385
253
+ },
254
+ {
255
+ "epoch": 15.0,
256
+ "eval_accuracy": 0.9926470588235294,
257
+ "eval_combined_score": 0.9936401662632853,
258
+ "eval_f1": 0.9946332737030412,
259
+ "eval_loss": 0.1502537876367569,
260
+ "eval_runtime": 0.7823,
261
+ "eval_samples_per_second": 521.529,
262
+ "eval_steps_per_second": 5.113,
263
+ "step": 29385
264
+ },
265
+ {
266
+ "epoch": 16.0,
267
+ "learning_rate": 3.4000000000000007e-05,
268
+ "loss": 0.1901,
269
+ "step": 31344
270
+ },
271
+ {
272
+ "epoch": 16.0,
273
+ "eval_accuracy": 0.9950980392156863,
274
+ "eval_combined_score": 0.9957633053221289,
275
+ "eval_f1": 0.9964285714285714,
276
+ "eval_loss": 0.14606404304504395,
277
+ "eval_runtime": 0.7757,
278
+ "eval_samples_per_second": 525.986,
279
+ "eval_steps_per_second": 5.157,
280
+ "step": 31344
281
+ },
282
+ {
283
+ "epoch": 17.0,
284
+ "learning_rate": 3.3e-05,
285
+ "loss": 0.1894,
286
+ "step": 33303
287
+ },
288
+ {
289
+ "epoch": 17.0,
290
+ "eval_accuracy": 0.9975490196078431,
291
+ "eval_combined_score": 0.9978800554210951,
292
+ "eval_f1": 0.998211091234347,
293
+ "eval_loss": 0.1498469114303589,
294
+ "eval_runtime": 0.7758,
295
+ "eval_samples_per_second": 525.884,
296
+ "eval_steps_per_second": 5.156,
297
+ "step": 33303
298
+ },
299
+ {
300
+ "epoch": 18.0,
301
+ "learning_rate": 3.2000000000000005e-05,
302
+ "loss": 0.1888,
303
+ "step": 35262
304
+ },
305
+ {
306
+ "epoch": 18.0,
307
+ "eval_accuracy": 0.9901960784313726,
308
+ "eval_combined_score": 0.9915266106442577,
309
+ "eval_f1": 0.9928571428571428,
310
+ "eval_loss": 0.14023421704769135,
311
+ "eval_runtime": 0.7766,
312
+ "eval_samples_per_second": 525.375,
313
+ "eval_steps_per_second": 5.151,
314
+ "step": 35262
315
+ },
316
+ {
317
+ "epoch": 19.0,
318
+ "learning_rate": 3.1e-05,
319
+ "loss": 0.1882,
320
+ "step": 37221
321
+ },
322
+ {
323
+ "epoch": 19.0,
324
+ "eval_accuracy": 0.9926470588235294,
325
+ "eval_combined_score": 0.9936401662632853,
326
+ "eval_f1": 0.9946332737030412,
327
+ "eval_loss": 0.14202910661697388,
328
+ "eval_runtime": 0.7794,
329
+ "eval_samples_per_second": 523.449,
330
+ "eval_steps_per_second": 5.132,
331
+ "step": 37221
332
+ },
333
+ {
334
+ "epoch": 20.0,
335
+ "learning_rate": 3e-05,
336
+ "loss": 0.1876,
337
+ "step": 39180
338
+ },
339
+ {
340
+ "epoch": 20.0,
341
+ "eval_accuracy": 0.9901960784313726,
342
+ "eval_combined_score": 0.9915393203544763,
343
+ "eval_f1": 0.9928825622775801,
344
+ "eval_loss": 0.13458134233951569,
345
+ "eval_runtime": 0.7781,
346
+ "eval_samples_per_second": 524.385,
347
+ "eval_steps_per_second": 5.141,
348
+ "step": 39180
349
+ },
350
+ {
351
+ "epoch": 21.0,
352
+ "learning_rate": 2.9e-05,
353
+ "loss": 0.1871,
354
+ "step": 41139
355
+ },
356
+ {
357
+ "epoch": 21.0,
358
+ "eval_accuracy": 0.9950980392156863,
359
+ "eval_combined_score": 0.9957633053221289,
360
+ "eval_f1": 0.9964285714285714,
361
+ "eval_loss": 0.13956362009048462,
362
+ "eval_runtime": 0.7768,
363
+ "eval_samples_per_second": 525.249,
364
+ "eval_steps_per_second": 5.15,
365
+ "step": 41139
366
+ },
367
+ {
368
+ "epoch": 22.0,
369
+ "learning_rate": 2.8000000000000003e-05,
370
+ "loss": 0.1867,
371
+ "step": 43098
372
+ },
373
+ {
374
+ "epoch": 22.0,
375
+ "eval_accuracy": 0.9950980392156863,
376
+ "eval_combined_score": 0.9957633053221289,
377
+ "eval_f1": 0.9964285714285714,
378
+ "eval_loss": 0.14428994059562683,
379
+ "eval_runtime": 0.7783,
380
+ "eval_samples_per_second": 524.192,
381
+ "eval_steps_per_second": 5.139,
382
+ "step": 43098
383
+ },
384
+ {
385
+ "epoch": 23.0,
386
+ "learning_rate": 2.7000000000000002e-05,
387
+ "loss": 0.1862,
388
+ "step": 45057
389
+ },
390
+ {
391
+ "epoch": 23.0,
392
+ "eval_accuracy": 0.9926470588235294,
393
+ "eval_combined_score": 0.9936497326203209,
394
+ "eval_f1": 0.9946524064171123,
395
+ "eval_loss": 0.1346110701560974,
396
+ "eval_runtime": 0.7827,
397
+ "eval_samples_per_second": 521.253,
398
+ "eval_steps_per_second": 5.11,
399
+ "step": 45057
400
+ },
401
+ {
402
+ "epoch": 24.0,
403
+ "learning_rate": 2.6000000000000002e-05,
404
+ "loss": 0.1857,
405
+ "step": 47016
406
+ },
407
+ {
408
+ "epoch": 24.0,
409
+ "eval_accuracy": 0.9950980392156863,
410
+ "eval_combined_score": 0.9957633053221289,
411
+ "eval_f1": 0.9964285714285714,
412
+ "eval_loss": 0.13611076772212982,
413
+ "eval_runtime": 0.7925,
414
+ "eval_samples_per_second": 514.847,
415
+ "eval_steps_per_second": 5.048,
416
+ "step": 47016
417
+ },
418
+ {
419
+ "epoch": 25.0,
420
+ "learning_rate": 2.5e-05,
421
+ "loss": 0.1854,
422
+ "step": 48975
423
+ },
424
+ {
425
+ "epoch": 25.0,
426
+ "eval_accuracy": 0.9926470588235294,
427
+ "eval_combined_score": 0.9936497326203209,
428
+ "eval_f1": 0.9946524064171123,
429
+ "eval_loss": 0.13179393112659454,
430
+ "eval_runtime": 0.7822,
431
+ "eval_samples_per_second": 521.594,
432
+ "eval_steps_per_second": 5.114,
433
+ "step": 48975
434
+ },
435
+ {
436
+ "epoch": 26.0,
437
+ "learning_rate": 2.4e-05,
438
+ "loss": 0.185,
439
+ "step": 50934
440
+ },
441
+ {
442
+ "epoch": 26.0,
443
+ "eval_accuracy": 0.9901960784313726,
444
+ "eval_combined_score": 0.9915393203544763,
445
+ "eval_f1": 0.9928825622775801,
446
+ "eval_loss": 0.13099727034568787,
447
+ "eval_runtime": 0.851,
448
+ "eval_samples_per_second": 479.464,
449
+ "eval_steps_per_second": 4.701,
450
+ "step": 50934
451
+ },
452
+ {
453
+ "epoch": 27.0,
454
+ "learning_rate": 2.3000000000000003e-05,
455
+ "loss": 0.1846,
456
+ "step": 52893
457
+ },
458
+ {
459
+ "epoch": 27.0,
460
+ "eval_accuracy": 0.9926470588235294,
461
+ "eval_combined_score": 0.9936497326203209,
462
+ "eval_f1": 0.9946524064171123,
463
+ "eval_loss": 0.13022534549236298,
464
+ "eval_runtime": 0.7818,
465
+ "eval_samples_per_second": 521.85,
466
+ "eval_steps_per_second": 5.116,
467
+ "step": 52893
468
+ },
469
+ {
470
+ "epoch": 28.0,
471
+ "learning_rate": 2.2000000000000003e-05,
472
+ "loss": 0.1842,
473
+ "step": 54852
474
+ },
475
+ {
476
+ "epoch": 28.0,
477
+ "eval_accuracy": 0.9950980392156863,
478
+ "eval_combined_score": 0.9957633053221289,
479
+ "eval_f1": 0.9964285714285714,
480
+ "eval_loss": 0.13285598158836365,
481
+ "eval_runtime": 0.7818,
482
+ "eval_samples_per_second": 521.866,
483
+ "eval_steps_per_second": 5.116,
484
+ "step": 54852
485
+ },
486
+ {
487
+ "epoch": 29.0,
488
+ "learning_rate": 2.1e-05,
489
+ "loss": 0.1839,
490
+ "step": 56811
491
+ },
492
+ {
493
+ "epoch": 29.0,
494
+ "eval_accuracy": 0.9901960784313726,
495
+ "eval_combined_score": 0.9915393203544763,
496
+ "eval_f1": 0.9928825622775801,
497
+ "eval_loss": 0.13004697859287262,
498
+ "eval_runtime": 0.7761,
499
+ "eval_samples_per_second": 525.714,
500
+ "eval_steps_per_second": 5.154,
501
+ "step": 56811
502
+ },
503
+ {
504
+ "epoch": 30.0,
505
+ "learning_rate": 2e-05,
506
+ "loss": 0.1836,
507
+ "step": 58770
508
+ },
509
+ {
510
+ "epoch": 30.0,
511
+ "eval_accuracy": 0.9901960784313726,
512
+ "eval_combined_score": 0.9915393203544763,
513
+ "eval_f1": 0.9928825622775801,
514
+ "eval_loss": 0.1328008770942688,
515
+ "eval_runtime": 0.7782,
516
+ "eval_samples_per_second": 524.253,
517
+ "eval_steps_per_second": 5.14,
518
+ "step": 58770
519
+ },
520
+ {
521
+ "epoch": 31.0,
522
+ "learning_rate": 1.9e-05,
523
+ "loss": 0.1832,
524
+ "step": 60729
525
+ },
526
+ {
527
+ "epoch": 31.0,
528
+ "eval_accuracy": 0.9901960784313726,
529
+ "eval_combined_score": 0.9915393203544763,
530
+ "eval_f1": 0.9928825622775801,
531
+ "eval_loss": 0.1326626092195511,
532
+ "eval_runtime": 0.7751,
533
+ "eval_samples_per_second": 526.407,
534
+ "eval_steps_per_second": 5.161,
535
+ "step": 60729
536
+ },
537
+ {
538
+ "epoch": 32.0,
539
+ "learning_rate": 1.8e-05,
540
+ "loss": 0.1829,
541
+ "step": 62688
542
+ },
543
+ {
544
+ "epoch": 32.0,
545
+ "eval_accuracy": 0.9901960784313726,
546
+ "eval_combined_score": 0.9915393203544763,
547
+ "eval_f1": 0.9928825622775801,
548
+ "eval_loss": 0.13079187273979187,
549
+ "eval_runtime": 0.7779,
550
+ "eval_samples_per_second": 524.516,
551
+ "eval_steps_per_second": 5.142,
552
+ "step": 62688
553
+ },
554
+ {
555
+ "epoch": 33.0,
556
+ "learning_rate": 1.7000000000000003e-05,
557
+ "loss": 0.1826,
558
+ "step": 64647
559
+ },
560
+ {
561
+ "epoch": 33.0,
562
+ "eval_accuracy": 0.9901960784313726,
563
+ "eval_combined_score": 0.9915393203544763,
564
+ "eval_f1": 0.9928825622775801,
565
+ "eval_loss": 0.12873521447181702,
566
+ "eval_runtime": 0.7782,
567
+ "eval_samples_per_second": 524.295,
568
+ "eval_steps_per_second": 5.14,
569
+ "step": 64647
570
+ },
571
+ {
572
+ "epoch": 34.0,
573
+ "learning_rate": 1.6000000000000003e-05,
574
+ "loss": 0.1824,
575
+ "step": 66606
576
+ },
577
+ {
578
+ "epoch": 34.0,
579
+ "eval_accuracy": 0.9926470588235294,
580
+ "eval_combined_score": 0.9936497326203209,
581
+ "eval_f1": 0.9946524064171123,
582
+ "eval_loss": 0.1308642029762268,
583
+ "eval_runtime": 0.7782,
584
+ "eval_samples_per_second": 524.283,
585
+ "eval_steps_per_second": 5.14,
586
+ "step": 66606
587
+ },
588
+ {
589
+ "epoch": 35.0,
590
+ "learning_rate": 1.5e-05,
591
+ "loss": 0.1821,
592
+ "step": 68565
593
+ },
594
+ {
595
+ "epoch": 35.0,
596
+ "eval_accuracy": 0.9926470588235294,
597
+ "eval_combined_score": 0.9936497326203209,
598
+ "eval_f1": 0.9946524064171123,
599
+ "eval_loss": 0.13088451325893402,
600
+ "eval_runtime": 0.7758,
601
+ "eval_samples_per_second": 525.926,
602
+ "eval_steps_per_second": 5.156,
603
+ "step": 68565
604
+ },
605
+ {
606
+ "epoch": 36.0,
607
+ "learning_rate": 1.4000000000000001e-05,
608
+ "loss": 0.1818,
609
+ "step": 70524
610
+ },
611
+ {
612
+ "epoch": 36.0,
613
+ "eval_accuracy": 0.9901960784313726,
614
+ "eval_combined_score": 0.9915393203544763,
615
+ "eval_f1": 0.9928825622775801,
616
+ "eval_loss": 0.1271485686302185,
617
+ "eval_runtime": 0.7809,
618
+ "eval_samples_per_second": 522.489,
619
+ "eval_steps_per_second": 5.122,
620
+ "step": 70524
621
+ },
622
+ {
623
+ "epoch": 37.0,
624
+ "learning_rate": 1.3000000000000001e-05,
625
+ "loss": 0.1816,
626
+ "step": 72483
627
+ },
628
+ {
629
+ "epoch": 37.0,
630
+ "eval_accuracy": 0.9877450980392157,
631
+ "eval_combined_score": 0.9894320516839064,
632
+ "eval_f1": 0.9911190053285969,
633
+ "eval_loss": 0.127828910946846,
634
+ "eval_runtime": 0.7755,
635
+ "eval_samples_per_second": 526.078,
636
+ "eval_steps_per_second": 5.158,
637
+ "step": 72483
638
+ },
639
+ {
640
+ "epoch": 38.0,
641
+ "learning_rate": 1.2e-05,
642
+ "loss": 0.1813,
643
+ "step": 74442
644
+ },
645
+ {
646
+ "epoch": 38.0,
647
+ "eval_accuracy": 0.9901960784313726,
648
+ "eval_combined_score": 0.9915393203544763,
649
+ "eval_f1": 0.9928825622775801,
650
+ "eval_loss": 0.1280445158481598,
651
+ "eval_runtime": 0.7777,
652
+ "eval_samples_per_second": 524.647,
653
+ "eval_steps_per_second": 5.144,
654
+ "step": 74442
655
+ },
656
+ {
657
+ "epoch": 39.0,
658
+ "learning_rate": 1.1000000000000001e-05,
659
+ "loss": 0.1811,
660
+ "step": 76401
661
+ },
662
+ {
663
+ "epoch": 39.0,
664
+ "eval_accuracy": 0.9901960784313726,
665
+ "eval_combined_score": 0.9915393203544763,
666
+ "eval_f1": 0.9928825622775801,
667
+ "eval_loss": 0.12891501188278198,
668
+ "eval_runtime": 0.7798,
669
+ "eval_samples_per_second": 523.193,
670
+ "eval_steps_per_second": 5.129,
671
+ "step": 76401
672
+ },
673
+ {
674
+ "epoch": 40.0,
675
+ "learning_rate": 1e-05,
676
+ "loss": 0.1809,
677
+ "step": 78360
678
+ },
679
+ {
680
+ "epoch": 40.0,
681
+ "eval_accuracy": 0.9877450980392157,
682
+ "eval_combined_score": 0.9894320516839064,
683
+ "eval_f1": 0.9911190053285969,
684
+ "eval_loss": 0.12895023822784424,
685
+ "eval_runtime": 0.7761,
686
+ "eval_samples_per_second": 525.732,
687
+ "eval_steps_per_second": 5.154,
688
+ "step": 78360
689
+ },
690
+ {
691
+ "epoch": 41.0,
692
+ "learning_rate": 9e-06,
693
+ "loss": 0.1807,
694
+ "step": 80319
695
+ },
696
+ {
697
+ "epoch": 41.0,
698
+ "eval_accuracy": 0.9877450980392157,
699
+ "eval_combined_score": 0.9894320516839064,
700
+ "eval_f1": 0.9911190053285969,
701
+ "eval_loss": 0.1256455034017563,
702
+ "eval_runtime": 0.7742,
703
+ "eval_samples_per_second": 526.998,
704
+ "eval_steps_per_second": 5.167,
705
+ "step": 80319
706
+ },
707
+ {
708
+ "epoch": 42.0,
709
+ "learning_rate": 8.000000000000001e-06,
710
+ "loss": 0.1805,
711
+ "step": 82278
712
+ },
713
+ {
714
+ "epoch": 42.0,
715
+ "eval_accuracy": 0.9926470588235294,
716
+ "eval_combined_score": 0.9936497326203209,
717
+ "eval_f1": 0.9946524064171123,
718
+ "eval_loss": 0.12676292657852173,
719
+ "eval_runtime": 0.7808,
720
+ "eval_samples_per_second": 522.525,
721
+ "eval_steps_per_second": 5.123,
722
+ "step": 82278
723
+ },
724
+ {
725
+ "epoch": 43.0,
726
+ "learning_rate": 7.000000000000001e-06,
727
+ "loss": 0.1803,
728
+ "step": 84237
729
+ },
730
+ {
731
+ "epoch": 43.0,
732
+ "eval_accuracy": 0.9926470588235294,
733
+ "eval_combined_score": 0.9936497326203209,
734
+ "eval_f1": 0.9946524064171123,
735
+ "eval_loss": 0.12742425501346588,
736
+ "eval_runtime": 0.7744,
737
+ "eval_samples_per_second": 526.85,
738
+ "eval_steps_per_second": 5.165,
739
+ "step": 84237
740
+ },
741
+ {
742
+ "epoch": 44.0,
743
+ "learning_rate": 6e-06,
744
+ "loss": 0.1801,
745
+ "step": 86196
746
+ },
747
+ {
748
+ "epoch": 44.0,
749
+ "eval_accuracy": 0.9926470588235294,
750
+ "eval_combined_score": 0.9936497326203209,
751
+ "eval_f1": 0.9946524064171123,
752
+ "eval_loss": 0.12773118913173676,
753
+ "eval_runtime": 0.7768,
754
+ "eval_samples_per_second": 525.233,
755
+ "eval_steps_per_second": 5.149,
756
+ "step": 86196
757
+ },
758
+ {
759
+ "epoch": 45.0,
760
+ "learning_rate": 5e-06,
761
+ "loss": 0.1799,
762
+ "step": 88155
763
+ },
764
+ {
765
+ "epoch": 45.0,
766
+ "eval_accuracy": 0.9926470588235294,
767
+ "eval_combined_score": 0.9936497326203209,
768
+ "eval_f1": 0.9946524064171123,
769
+ "eval_loss": 0.12637551128864288,
770
+ "eval_runtime": 0.7756,
771
+ "eval_samples_per_second": 526.076,
772
+ "eval_steps_per_second": 5.158,
773
+ "step": 88155
774
+ },
775
+ {
776
+ "epoch": 46.0,
777
+ "learning_rate": 4.000000000000001e-06,
778
+ "loss": 0.1797,
779
+ "step": 90114
780
+ },
781
+ {
782
+ "epoch": 46.0,
783
+ "eval_accuracy": 0.9901960784313726,
784
+ "eval_combined_score": 0.9915393203544763,
785
+ "eval_f1": 0.9928825622775801,
786
+ "eval_loss": 0.12735696136951447,
787
+ "eval_runtime": 0.7805,
788
+ "eval_samples_per_second": 522.735,
789
+ "eval_steps_per_second": 5.125,
790
+ "step": 90114
791
+ },
792
+ {
793
+ "epoch": 46.0,
794
+ "step": 90114,
795
+ "total_flos": 3.6163552369495245e+17,
796
+ "train_loss": 0.19228331322046327,
797
+ "train_runtime": 51035.534,
798
+ "train_samples_per_second": 245.648,
799
+ "train_steps_per_second": 1.919
800
+ }
801
+ ],
802
+ "max_steps": 97950,
803
+ "num_train_epochs": 50,
804
+ "total_flos": 3.6163552369495245e+17,
805
+ "trial_name": null,
806
+ "trial_params": null
807
+ }