hocheewai commited on
Commit
dc84830
1 Parent(s): 934f7b1

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.9096774193548387,
4
- "eval_loss": 0.27532899379730225,
5
- "eval_runtime": 44.6134,
6
- "eval_samples_per_second": 3.474,
7
- "eval_steps_per_second": 1.748
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.8284023668639053,
4
+ "eval_loss": 0.45989519357681274,
5
+ "eval_runtime": 94.1469,
6
+ "eval_samples_per_second": 1.795,
7
+ "eval_steps_per_second": 0.903
8
  }
runs/May11_18-07-17_7ffe87b6d6e8/events.out.tfevents.1683829879.7ffe87b6d6e8.6406.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89529d8cbce5e82369fc4f7c1a80fb8967a944339aeb5e1f9317c696f140fcd
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432986d9b7a1a03f4af6f7b7c987efb96e6b53b13898824fdd0674920286b1ce
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.9096774193548387,
4
- "eval_loss": 0.27532899379730225,
5
- "eval_runtime": 44.6134,
6
- "eval_samples_per_second": 3.474,
7
- "eval_steps_per_second": 1.748
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.8284023668639053,
4
+ "eval_loss": 0.45989519357681274,
5
+ "eval_runtime": 94.1469,
6
+ "eval_samples_per_second": 1.795,
7
+ "eval_steps_per_second": 0.903
8
  }
trainer_state.json CHANGED
@@ -1,439 +1,439 @@
1
  {
2
- "best_metric": 0.9,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-600",
4
  "epoch": 3.25,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.02,
12
- "learning_rate": 8.333333333333334e-06,
13
- "loss": 2.3492,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.03,
18
- "learning_rate": 1.6666666666666667e-05,
19
- "loss": 2.3466,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.05,
24
- "learning_rate": 2.5e-05,
25
- "loss": 2.3064,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.07,
30
- "learning_rate": 3.3333333333333335e-05,
31
- "loss": 2.1908,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.08,
36
- "learning_rate": 4.166666666666667e-05,
37
- "loss": 2.1392,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.1,
42
- "learning_rate": 5e-05,
43
- "loss": 2.207,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.12,
48
- "learning_rate": 4.9074074074074075e-05,
49
- "loss": 2.1197,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.13,
54
- "learning_rate": 4.814814814814815e-05,
55
- "loss": 1.7111,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.15,
60
- "learning_rate": 4.722222222222222e-05,
61
- "loss": 1.7897,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.17,
66
- "learning_rate": 4.62962962962963e-05,
67
- "loss": 1.7259,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.18,
72
- "learning_rate": 4.5370370370370374e-05,
73
- "loss": 1.491,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.2,
78
- "learning_rate": 4.4444444444444447e-05,
79
- "loss": 1.6251,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.22,
84
- "learning_rate": 4.351851851851852e-05,
85
- "loss": 1.5884,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.23,
90
- "learning_rate": 4.259259259259259e-05,
91
- "loss": 1.3184,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.25,
96
- "learning_rate": 4.166666666666667e-05,
97
- "loss": 1.5351,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.25,
102
- "eval_accuracy": 0.5285714285714286,
103
- "eval_loss": 1.7103750705718994,
104
- "eval_runtime": 20.2519,
105
- "eval_samples_per_second": 3.456,
106
- "eval_steps_per_second": 1.728,
107
- "step": 150
108
  },
109
  {
110
- "epoch": 1.02,
111
- "learning_rate": 4.074074074074074e-05,
112
- "loss": 1.3053,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.03,
117
- "learning_rate": 3.981481481481482e-05,
118
- "loss": 1.1629,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.05,
123
- "learning_rate": 3.888888888888889e-05,
124
- "loss": 0.9645,
125
  "step": 180
126
  },
127
  {
128
- "epoch": 1.07,
129
- "learning_rate": 3.7962962962962964e-05,
130
- "loss": 0.7846,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.08,
135
- "learning_rate": 3.7037037037037037e-05,
136
- "loss": 0.9741,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.1,
141
- "learning_rate": 3.611111111111111e-05,
142
- "loss": 0.6873,
143
  "step": 210
144
  },
145
  {
146
- "epoch": 1.12,
147
- "learning_rate": 3.518518518518519e-05,
148
- "loss": 0.9336,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.13,
153
- "learning_rate": 3.425925925925926e-05,
154
- "loss": 0.8429,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.15,
159
- "learning_rate": 3.3333333333333335e-05,
160
- "loss": 0.7939,
161
  "step": 240
162
  },
163
  {
164
- "epoch": 1.17,
165
- "learning_rate": 3.240740740740741e-05,
166
- "loss": 0.6433,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.18,
171
- "learning_rate": 3.148148148148148e-05,
172
- "loss": 0.6427,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.2,
177
- "learning_rate": 3.055555555555556e-05,
178
- "loss": 0.8235,
179
  "step": 270
180
  },
181
  {
182
- "epoch": 1.22,
183
- "learning_rate": 2.962962962962963e-05,
184
- "loss": 0.3465,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.23,
189
- "learning_rate": 2.8703703703703706e-05,
190
- "loss": 0.77,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.25,
195
- "learning_rate": 2.777777777777778e-05,
196
- "loss": 0.8621,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.25,
201
- "eval_accuracy": 0.7428571428571429,
202
- "eval_loss": 0.7072036266326904,
203
- "eval_runtime": 19.9162,
204
- "eval_samples_per_second": 3.515,
205
- "eval_steps_per_second": 1.757,
206
- "step": 300
207
  },
208
  {
209
- "epoch": 2.02,
210
- "learning_rate": 2.6851851851851855e-05,
211
- "loss": 0.309,
212
  "step": 310
213
  },
214
  {
215
  "epoch": 2.03,
216
- "learning_rate": 2.5925925925925925e-05,
217
- "loss": 0.1178,
218
  "step": 320
219
  },
220
  {
221
  "epoch": 2.05,
222
- "learning_rate": 2.5e-05,
223
- "loss": 0.3755,
224
  "step": 330
225
  },
226
  {
227
- "epoch": 2.07,
228
- "learning_rate": 2.4074074074074074e-05,
229
- "loss": 0.2543,
230
  "step": 340
231
  },
232
  {
233
  "epoch": 2.08,
234
- "learning_rate": 2.314814814814815e-05,
235
- "loss": 0.2794,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.1,
240
- "learning_rate": 2.2222222222222223e-05,
241
- "loss": 0.3993,
242
  "step": 360
243
  },
244
  {
245
- "epoch": 2.12,
246
- "learning_rate": 2.1296296296296296e-05,
247
- "loss": 0.1797,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.13,
252
- "learning_rate": 2.037037037037037e-05,
253
- "loss": 0.2349,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.15,
258
- "learning_rate": 1.9444444444444445e-05,
259
- "loss": 0.2238,
260
  "step": 390
261
  },
262
  {
263
- "epoch": 2.17,
264
- "learning_rate": 1.8518518518518518e-05,
265
- "loss": 0.3756,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.18,
270
- "learning_rate": 1.7592592592592595e-05,
271
- "loss": 0.1291,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.2,
276
- "learning_rate": 1.6666666666666667e-05,
277
- "loss": 0.2602,
278
  "step": 420
279
  },
280
  {
281
- "epoch": 2.22,
282
- "learning_rate": 1.574074074074074e-05,
283
- "loss": 0.3749,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.23,
288
- "learning_rate": 1.4814814814814815e-05,
289
- "loss": 0.2041,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.25,
294
- "learning_rate": 1.388888888888889e-05,
295
- "loss": 0.2002,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.25,
300
- "eval_accuracy": 0.8285714285714286,
301
- "eval_loss": 0.5905740261077881,
302
- "eval_runtime": 19.9694,
303
- "eval_samples_per_second": 3.505,
304
- "eval_steps_per_second": 1.753,
305
- "step": 450
306
  },
307
  {
308
- "epoch": 3.02,
309
- "learning_rate": 1.2962962962962962e-05,
310
- "loss": 0.4304,
311
  "step": 460
312
  },
313
  {
314
  "epoch": 3.03,
315
- "learning_rate": 1.2037037037037037e-05,
316
- "loss": 0.1548,
317
  "step": 470
318
  },
319
  {
320
- "epoch": 3.05,
321
- "learning_rate": 1.1111111111111112e-05,
322
- "loss": 0.0379,
323
  "step": 480
324
  },
325
  {
326
- "epoch": 3.07,
327
- "learning_rate": 1.0185185185185185e-05,
328
- "loss": 0.0551,
329
  "step": 490
330
  },
331
  {
332
  "epoch": 3.08,
333
- "learning_rate": 9.259259259259259e-06,
334
- "loss": 0.0582,
335
  "step": 500
336
  },
337
  {
338
- "epoch": 3.1,
339
- "learning_rate": 8.333333333333334e-06,
340
- "loss": 0.1102,
341
  "step": 510
342
  },
343
  {
344
- "epoch": 3.12,
345
- "learning_rate": 7.4074074074074075e-06,
346
- "loss": 0.1212,
347
  "step": 520
348
  },
349
  {
350
  "epoch": 3.13,
351
- "learning_rate": 6.481481481481481e-06,
352
- "loss": 0.136,
353
  "step": 530
354
  },
355
  {
356
- "epoch": 3.15,
357
- "learning_rate": 5.555555555555556e-06,
358
- "loss": 0.133,
359
  "step": 540
360
  },
361
  {
362
- "epoch": 3.17,
363
- "learning_rate": 4.6296296296296296e-06,
364
- "loss": 0.2359,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.18,
369
- "learning_rate": 3.7037037037037037e-06,
370
- "loss": 0.2602,
371
  "step": 560
372
  },
373
  {
374
- "epoch": 3.2,
375
- "learning_rate": 2.777777777777778e-06,
376
- "loss": 0.1343,
377
  "step": 570
378
  },
379
  {
380
- "epoch": 3.22,
381
- "learning_rate": 1.8518518518518519e-06,
382
- "loss": 0.137,
383
  "step": 580
384
  },
385
  {
386
  "epoch": 3.23,
387
- "learning_rate": 9.259259259259259e-07,
388
- "loss": 0.2331,
389
  "step": 590
390
  },
391
  {
392
- "epoch": 3.25,
393
- "learning_rate": 0.0,
394
- "loss": 0.4786,
395
  "step": 600
396
  },
397
  {
398
  "epoch": 3.25,
399
- "eval_accuracy": 0.9,
400
- "eval_loss": 0.285513311624527,
401
- "eval_runtime": 19.8769,
402
- "eval_samples_per_second": 3.522,
403
- "eval_steps_per_second": 1.761,
404
- "step": 600
405
  },
406
  {
407
  "epoch": 3.25,
408
- "step": 600,
409
- "total_flos": 1.495384188125184e+18,
410
- "train_loss": 0.7935705941418807,
411
- "train_runtime": 854.0599,
412
- "train_samples_per_second": 1.405,
413
- "train_steps_per_second": 0.703
414
  },
415
  {
416
  "epoch": 3.25,
417
- "eval_accuracy": 0.9096774193548387,
418
- "eval_loss": 0.27532899379730225,
419
- "eval_runtime": 46.5951,
420
- "eval_samples_per_second": 3.327,
421
- "eval_steps_per_second": 1.674,
422
- "step": 600
423
  },
424
  {
425
  "epoch": 3.25,
426
- "eval_accuracy": 0.9096774193548387,
427
- "eval_loss": 0.27532899379730225,
428
- "eval_runtime": 44.6134,
429
- "eval_samples_per_second": 3.474,
430
- "eval_steps_per_second": 1.748,
431
- "step": 600
432
  }
433
  ],
434
- "max_steps": 600,
435
  "num_train_epochs": 9223372036854775807,
436
- "total_flos": 1.495384188125184e+18,
437
  "trial_name": null,
438
  "trial_params": null
439
  }
 
1
  {
2
+ "best_metric": 0.7738095238095238,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-604",
4
  "epoch": 3.25,
5
+ "global_step": 604,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.02,
12
+ "learning_rate": 8.196721311475409e-06,
13
+ "loss": 2.4329,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.03,
18
+ "learning_rate": 1.6393442622950818e-05,
19
+ "loss": 2.4383,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.05,
24
+ "learning_rate": 2.459016393442623e-05,
25
+ "loss": 2.428,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.07,
30
+ "learning_rate": 3.2786885245901635e-05,
31
+ "loss": 2.3514,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.08,
36
+ "learning_rate": 4.098360655737705e-05,
37
+ "loss": 2.4083,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.1,
42
+ "learning_rate": 4.918032786885246e-05,
43
+ "loss": 2.2609,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.12,
48
+ "learning_rate": 4.9171270718232046e-05,
49
+ "loss": 2.1355,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.13,
54
+ "learning_rate": 4.825046040515654e-05,
55
+ "loss": 2.0781,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.15,
60
+ "learning_rate": 4.732965009208104e-05,
61
+ "loss": 1.686,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.17,
66
+ "learning_rate": 4.6408839779005524e-05,
67
+ "loss": 1.5192,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.18,
72
+ "learning_rate": 4.5488029465930024e-05,
73
+ "loss": 1.7072,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.2,
78
+ "learning_rate": 4.4567219152854516e-05,
79
+ "loss": 1.93,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.22,
84
+ "learning_rate": 4.364640883977901e-05,
85
+ "loss": 1.4418,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.23,
90
+ "learning_rate": 4.27255985267035e-05,
91
+ "loss": 1.6683,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.25,
96
+ "learning_rate": 4.1804788213627995e-05,
97
+ "loss": 1.3861,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.25,
102
+ "eval_accuracy": 0.40476190476190477,
103
+ "eval_loss": 1.518173336982727,
104
+ "eval_runtime": 74.2638,
105
+ "eval_samples_per_second": 1.131,
106
+ "eval_steps_per_second": 0.566,
107
+ "step": 151
108
  },
109
  {
110
+ "epoch": 1.01,
111
+ "learning_rate": 4.088397790055249e-05,
112
+ "loss": 1.1002,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.03,
117
+ "learning_rate": 3.996316758747698e-05,
118
+ "loss": 1.0042,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.05,
123
+ "learning_rate": 3.904235727440147e-05,
124
+ "loss": 0.6699,
125
  "step": 180
126
  },
127
  {
128
+ "epoch": 1.06,
129
+ "learning_rate": 3.812154696132597e-05,
130
+ "loss": 0.7446,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.08,
135
+ "learning_rate": 3.720073664825046e-05,
136
+ "loss": 0.575,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.1,
141
+ "learning_rate": 3.627992633517496e-05,
142
+ "loss": 1.0777,
143
  "step": 210
144
  },
145
  {
146
+ "epoch": 1.11,
147
+ "learning_rate": 3.535911602209945e-05,
148
+ "loss": 0.8046,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.13,
153
+ "learning_rate": 3.4438305709023944e-05,
154
+ "loss": 1.3932,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.15,
159
+ "learning_rate": 3.3517495395948436e-05,
160
+ "loss": 1.0903,
161
  "step": 240
162
  },
163
  {
164
+ "epoch": 1.16,
165
+ "learning_rate": 3.259668508287293e-05,
166
+ "loss": 0.9612,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.18,
171
+ "learning_rate": 3.167587476979743e-05,
172
+ "loss": 1.0058,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.2,
177
+ "learning_rate": 3.0755064456721915e-05,
178
+ "loss": 1.0088,
179
  "step": 270
180
  },
181
  {
182
+ "epoch": 1.21,
183
+ "learning_rate": 2.983425414364641e-05,
184
+ "loss": 0.5327,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.23,
189
+ "learning_rate": 2.8913443830570904e-05,
190
+ "loss": 0.5463,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.25,
195
+ "learning_rate": 2.7992633517495396e-05,
196
+ "loss": 0.6672,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.25,
201
+ "eval_accuracy": 0.7023809523809523,
202
+ "eval_loss": 0.9395186901092529,
203
+ "eval_runtime": 73.8401,
204
+ "eval_samples_per_second": 1.138,
205
+ "eval_steps_per_second": 0.569,
206
+ "step": 302
207
  },
208
  {
209
+ "epoch": 2.01,
210
+ "learning_rate": 2.707182320441989e-05,
211
+ "loss": 0.1746,
212
  "step": 310
213
  },
214
  {
215
  "epoch": 2.03,
216
+ "learning_rate": 2.6151012891344385e-05,
217
+ "loss": 0.2385,
218
  "step": 320
219
  },
220
  {
221
  "epoch": 2.05,
222
+ "learning_rate": 2.523020257826888e-05,
223
+ "loss": 0.7082,
224
  "step": 330
225
  },
226
  {
227
+ "epoch": 2.06,
228
+ "learning_rate": 2.430939226519337e-05,
229
+ "loss": 0.4966,
230
  "step": 340
231
  },
232
  {
233
  "epoch": 2.08,
234
+ "learning_rate": 2.3388581952117867e-05,
235
+ "loss": 0.74,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.1,
240
+ "learning_rate": 2.246777163904236e-05,
241
+ "loss": 0.5615,
242
  "step": 360
243
  },
244
  {
245
+ "epoch": 2.11,
246
+ "learning_rate": 2.1546961325966852e-05,
247
+ "loss": 0.406,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.13,
252
+ "learning_rate": 2.0626151012891345e-05,
253
+ "loss": 0.2123,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.15,
258
+ "learning_rate": 1.9705340699815838e-05,
259
+ "loss": 0.0971,
260
  "step": 390
261
  },
262
  {
263
+ "epoch": 2.16,
264
+ "learning_rate": 1.878453038674033e-05,
265
+ "loss": 0.2095,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.18,
270
+ "learning_rate": 1.7863720073664823e-05,
271
+ "loss": 0.0643,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.2,
276
+ "learning_rate": 1.694290976058932e-05,
277
+ "loss": 0.1121,
278
  "step": 420
279
  },
280
  {
281
+ "epoch": 2.21,
282
+ "learning_rate": 1.6022099447513812e-05,
283
+ "loss": 0.4411,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.23,
288
+ "learning_rate": 1.5101289134438307e-05,
289
+ "loss": 0.2766,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.25,
294
+ "learning_rate": 1.41804788213628e-05,
295
+ "loss": 0.1478,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.25,
300
+ "eval_accuracy": 0.7380952380952381,
301
+ "eval_loss": 0.7313230633735657,
302
+ "eval_runtime": 75.0778,
303
+ "eval_samples_per_second": 1.119,
304
+ "eval_steps_per_second": 0.559,
305
+ "step": 453
306
  },
307
  {
308
+ "epoch": 3.01,
309
+ "learning_rate": 1.3259668508287292e-05,
310
+ "loss": 0.2004,
311
  "step": 460
312
  },
313
  {
314
  "epoch": 3.03,
315
+ "learning_rate": 1.2338858195211787e-05,
316
+ "loss": 0.0466,
317
  "step": 470
318
  },
319
  {
320
+ "epoch": 3.04,
321
+ "learning_rate": 1.1418047882136281e-05,
322
+ "loss": 0.2506,
323
  "step": 480
324
  },
325
  {
326
+ "epoch": 3.06,
327
+ "learning_rate": 1.0497237569060774e-05,
328
+ "loss": 0.2664,
329
  "step": 490
330
  },
331
  {
332
  "epoch": 3.08,
333
+ "learning_rate": 9.576427255985269e-06,
334
+ "loss": 0.3079,
335
  "step": 500
336
  },
337
  {
338
+ "epoch": 3.09,
339
+ "learning_rate": 8.655616942909761e-06,
340
+ "loss": 0.3008,
341
  "step": 510
342
  },
343
  {
344
+ "epoch": 3.11,
345
+ "learning_rate": 7.734806629834254e-06,
346
+ "loss": 0.1139,
347
  "step": 520
348
  },
349
  {
350
  "epoch": 3.13,
351
+ "learning_rate": 6.8139963167587485e-06,
352
+ "loss": 0.271,
353
  "step": 530
354
  },
355
  {
356
+ "epoch": 3.14,
357
+ "learning_rate": 5.893186003683242e-06,
358
+ "loss": 0.104,
359
  "step": 540
360
  },
361
  {
362
+ "epoch": 3.16,
363
+ "learning_rate": 4.972375690607735e-06,
364
+ "loss": 0.191,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.18,
369
+ "learning_rate": 4.0515653775322285e-06,
370
+ "loss": 0.1103,
371
  "step": 560
372
  },
373
  {
374
+ "epoch": 3.19,
375
+ "learning_rate": 3.1307550644567225e-06,
376
+ "loss": 0.1846,
377
  "step": 570
378
  },
379
  {
380
+ "epoch": 3.21,
381
+ "learning_rate": 2.2099447513812157e-06,
382
+ "loss": 0.1183,
383
  "step": 580
384
  },
385
  {
386
  "epoch": 3.23,
387
+ "learning_rate": 1.289134438305709e-06,
388
+ "loss": 0.0612,
389
  "step": 590
390
  },
391
  {
392
+ "epoch": 3.24,
393
+ "learning_rate": 3.6832412523020263e-07,
394
+ "loss": 0.2596,
395
  "step": 600
396
  },
397
  {
398
  "epoch": 3.25,
399
+ "eval_accuracy": 0.7738095238095238,
400
+ "eval_loss": 0.543230414390564,
401
+ "eval_runtime": 74.0438,
402
+ "eval_samples_per_second": 1.134,
403
+ "eval_steps_per_second": 0.567,
404
+ "step": 604
405
  },
406
  {
407
  "epoch": 3.25,
408
+ "step": 604,
409
+ "total_flos": 1.5053668400415375e+18,
410
+ "train_loss": 0.8410116481958636,
411
+ "train_runtime": 1073.1012,
412
+ "train_samples_per_second": 1.126,
413
+ "train_steps_per_second": 0.563
414
  },
415
  {
416
  "epoch": 3.25,
417
+ "eval_accuracy": 0.8284023668639053,
418
+ "eval_loss": 0.4598952829837799,
419
+ "eval_runtime": 100.3611,
420
+ "eval_samples_per_second": 1.684,
421
+ "eval_steps_per_second": 0.847,
422
+ "step": 604
423
  },
424
  {
425
  "epoch": 3.25,
426
+ "eval_accuracy": 0.8284023668639053,
427
+ "eval_loss": 0.45989519357681274,
428
+ "eval_runtime": 94.1469,
429
+ "eval_samples_per_second": 1.795,
430
+ "eval_steps_per_second": 0.903,
431
+ "step": 604
432
  }
433
  ],
434
+ "max_steps": 604,
435
  "num_train_epochs": 9223372036854775807,
436
+ "total_flos": 1.5053668400415375e+18,
437
  "trial_name": null,
438
  "trial_params": null
439
  }