Nadav commited on
Commit
d457b33
1 Parent(s): d3124d3

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "_name_or_path": "Nadav/PretrainedPHD",
3
- "architectures": [
4
- "PIXELForPreTraining"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "decoder_hidden_size": 512,
8
- "decoder_intermediate_size": 2048,
9
- "decoder_num_attention_heads": 16,
10
- "decoder_num_hidden_layers": 8,
11
- "hidden_act": "gelu",
12
- "hidden_dropout_prob": 0.1,
13
- "hidden_size": 768,
14
- "image_size": [
15
- 368,
16
- 368
17
- ],
18
- "initializer_range": 0.02,
19
- "intermediate_size": 3072,
20
- "layer_norm_eps": 1e-12,
21
- "mask_ratio": 0.29,
22
- "model_type": "pixel",
23
- "norm_pix_loss": true,
24
- "num_attention_heads": 12,
25
- "num_channels": 3,
26
- "num_hidden_layers": 12,
27
- "patch_size": 16,
28
- "qkv_bias": true,
29
- "torch_dtype": "float32",
30
- "transformers_version": "4.17.0"
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3278127396f5d3f8558d8e933cd802503bc96eb286ac4ab8615fdd148bdb7c65
3
- size 893439185
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f55e8b51abb099c47f1d26ef4448005ca8276d7a11a4fd49a19180fc80b2825a
3
- size 449471589
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:88004fc539503ca7b97859aa1d0c5a82fcb7f351b44fb5ff1b5865391c2b3cde
3
- size 15587
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccdf82dc05f3b4b6efaa6b42846dbff856f1303f57b4b6c56f7597dda3131a18
3
- size 559
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:de21128fb4d2d9fbd6335f650a62e3e1299cfe449b8f64957937f253cda36cc0
3
- size 623
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,2736 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.05,
5
- "global_step": 200000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 0.00010077985004622052,
13
- "loss": 0.5572,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.01,
18
- "learning_rate": 0.00010077565027123787,
19
- "loss": 0.5131,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.01,
24
- "learning_rate": 0.00010076865093411392,
25
- "loss": 0.505,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.01,
30
- "learning_rate": 0.00010075885246660077,
31
- "loss": 0.5001,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.01,
36
- "learning_rate": 0.00010074625547311406,
37
- "loss": 0.4965,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.01,
42
- "learning_rate": 0.00010073086073069567,
43
- "loss": 0.4937,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.02,
48
- "learning_rate": 0.00010071266918896582,
49
- "loss": 0.4921,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.02,
54
- "learning_rate": 0.0001006916819700645,
55
- "loss": 0.4885,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 0.02,
60
- "learning_rate": 0.00010066790036858225,
61
- "loss": 0.4872,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 0.03,
66
- "learning_rate": 0.00010064132585148025,
67
- "loss": 0.4861,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.03,
72
- "eval_loss": 0.45849505066871643,
73
- "eval_runtime": 280.4645,
74
- "eval_samples_per_second": 153.317,
75
- "eval_steps_per_second": 2.396,
76
- "step": 5000
77
- },
78
- {
79
- "epoch": 0.03,
80
- "learning_rate": 0.0001006119600579999,
81
- "loss": 0.4851,
82
- "step": 5500
83
- },
84
- {
85
- "epoch": 0.03,
86
- "learning_rate": 0.00010057980479956167,
87
- "loss": 0.4827,
88
- "step": 6000
89
- },
90
- {
91
- "epoch": 0.03,
92
- "learning_rate": 0.00010054493472563566,
93
- "loss": 0.4814,
94
- "step": 6500
95
- },
96
- {
97
- "epoch": 0.04,
98
- "learning_rate": 0.00010050729045131372,
99
- "loss": 0.4818,
100
- "step": 7000
101
- },
102
- {
103
- "epoch": 0.04,
104
- "learning_rate": 0.0001004667905137074,
105
- "loss": 0.4796,
106
- "step": 7500
107
- },
108
- {
109
- "epoch": 0.04,
110
- "learning_rate": 0.00010042351006588448,
111
- "loss": 0.4787,
112
- "step": 8000
113
- },
114
- {
115
- "epoch": 0.04,
116
- "learning_rate": 0.00010037745177758686,
117
- "loss": 0.4792,
118
- "step": 8500
119
- },
120
- {
121
- "epoch": 0.04,
122
- "learning_rate": 0.00010032861848990674,
123
- "loss": 0.4775,
124
- "step": 9000
125
- },
126
- {
127
- "epoch": 0.05,
128
- "learning_rate": 0.0001002770132151113,
129
- "loss": 0.4767,
130
- "step": 9500
131
- },
132
- {
133
- "epoch": 0.05,
134
- "learning_rate": 0.00010022275064567964,
135
- "loss": 0.4755,
136
- "step": 10000
137
- },
138
- {
139
- "epoch": 0.05,
140
- "eval_loss": 0.44909462332725525,
141
- "eval_runtime": 263.0328,
142
- "eval_samples_per_second": 163.478,
143
- "eval_steps_per_second": 2.555,
144
- "step": 10000
145
- },
146
- {
147
- "epoch": 0.05,
148
- "learning_rate": 0.00010016561664465461,
149
- "loss": 0.4746,
150
- "step": 10500
151
- },
152
- {
153
- "epoch": 0.06,
154
- "learning_rate": 0.00010010572071123591,
155
- "loss": 0.4733,
156
- "step": 11000
157
- },
158
- {
159
- "epoch": 0.06,
160
- "learning_rate": 0.00010004306654008681,
161
- "loss": 0.4721,
162
- "step": 11500
163
- },
164
- {
165
- "epoch": 0.06,
166
- "learning_rate": 9.997765799601176e-05,
167
- "loss": 0.473,
168
- "step": 12000
169
- },
170
- {
171
- "epoch": 0.06,
172
- "learning_rate": 9.990949911371783e-05,
173
- "loss": 0.4708,
174
- "step": 12500
175
- },
176
- {
177
- "epoch": 0.07,
178
- "learning_rate": 9.983859409756594e-05,
179
- "loss": 0.4723,
180
- "step": 13000
181
- },
182
- {
183
- "epoch": 0.07,
184
- "learning_rate": 9.976494732131149e-05,
185
- "loss": 0.4711,
186
- "step": 13500
187
- },
188
- {
189
- "epoch": 0.07,
190
- "learning_rate": 9.968856332783455e-05,
191
- "loss": 0.4713,
192
- "step": 14000
193
- },
194
- {
195
- "epoch": 0.07,
196
- "learning_rate": 9.960944682885961e-05,
197
- "loss": 0.4699,
198
- "step": 14500
199
- },
200
- {
201
- "epoch": 0.07,
202
- "learning_rate": 9.952760270466502e-05,
203
- "loss": 0.4705,
204
- "step": 15000
205
- },
206
- {
207
- "epoch": 0.07,
208
- "eval_loss": 0.44262173771858215,
209
- "eval_runtime": 272.1067,
210
- "eval_samples_per_second": 158.026,
211
- "eval_steps_per_second": 2.47,
212
- "step": 15000
213
- },
214
- {
215
- "epoch": 0.08,
216
- "learning_rate": 9.94430360037819e-05,
217
- "loss": 0.4686,
218
- "step": 15500
219
- },
220
- {
221
- "epoch": 0.08,
222
- "learning_rate": 9.935575194268265e-05,
223
- "loss": 0.469,
224
- "step": 16000
225
- },
226
- {
227
- "epoch": 0.08,
228
- "learning_rate": 9.926612128456279e-05,
229
- "loss": 0.467,
230
- "step": 16500
231
- },
232
- {
233
- "epoch": 0.09,
234
- "learning_rate": 9.917342963701418e-05,
235
- "loss": 0.4674,
236
- "step": 17000
237
- },
238
- {
239
- "epoch": 0.09,
240
- "learning_rate": 9.907803725984013e-05,
241
- "loss": 0.4662,
242
- "step": 17500
243
- },
244
- {
245
- "epoch": 0.09,
246
- "learning_rate": 9.897995003729183e-05,
247
- "loss": 0.4654,
248
- "step": 18000
249
- },
250
- {
251
- "epoch": 0.09,
252
- "learning_rate": 9.887917401985114e-05,
253
- "loss": 0.4647,
254
- "step": 18500
255
- },
256
- {
257
- "epoch": 0.1,
258
- "learning_rate": 9.877592501404869e-05,
259
- "loss": 0.4656,
260
- "step": 19000
261
- },
262
- {
263
- "epoch": 0.1,
264
- "learning_rate": 9.866979556723038e-05,
265
- "loss": 0.465,
266
- "step": 19500
267
- },
268
- {
269
- "epoch": 0.1,
270
- "learning_rate": 9.856099645730841e-05,
271
- "loss": 0.4649,
272
- "step": 20000
273
- },
274
- {
275
- "epoch": 0.1,
276
- "eval_loss": 0.43856754899024963,
277
- "eval_runtime": 269.8512,
278
- "eval_samples_per_second": 159.347,
279
- "eval_steps_per_second": 2.49,
280
- "step": 20000
281
- },
282
- {
283
- "epoch": 0.1,
284
- "learning_rate": 9.844953439552432e-05,
285
- "loss": 0.4627,
286
- "step": 20500
287
- },
288
- {
289
- "epoch": 0.1,
290
- "learning_rate": 9.833564713977207e-05,
291
- "loss": 0.4633,
292
- "step": 21000
293
- },
294
- {
295
- "epoch": 0.11,
296
- "learning_rate": 9.821912141831972e-05,
297
- "loss": 0.4627,
298
- "step": 21500
299
- },
300
- {
301
- "epoch": 0.11,
302
- "learning_rate": 9.809972296167543e-05,
303
- "loss": 0.4613,
304
- "step": 22000
305
- },
306
- {
307
- "epoch": 0.11,
308
- "learning_rate": 9.797769000669104e-05,
309
- "loss": 0.462,
310
- "step": 22500
311
- },
312
- {
313
- "epoch": 0.12,
314
- "learning_rate": 9.785303008093405e-05,
315
- "loss": 0.4605,
316
- "step": 23000
317
- },
318
- {
319
- "epoch": 0.12,
320
- "learning_rate": 9.772575087401588e-05,
321
- "loss": 0.4605,
322
- "step": 23500
323
- },
324
- {
325
- "epoch": 0.12,
326
- "learning_rate": 9.759586023711756e-05,
327
- "loss": 0.4587,
328
- "step": 24000
329
- },
330
- {
331
- "epoch": 0.12,
332
- "learning_rate": 9.746336618250542e-05,
333
- "loss": 0.4596,
334
- "step": 24500
335
- },
336
- {
337
- "epoch": 0.12,
338
- "learning_rate": 9.732827688303682e-05,
339
- "loss": 0.4579,
340
- "step": 25000
341
- },
342
- {
343
- "epoch": 0.12,
344
- "eval_loss": 0.43463194370269775,
345
- "eval_runtime": 278.2026,
346
- "eval_samples_per_second": 154.564,
347
- "eval_steps_per_second": 2.416,
348
- "step": 25000
349
- },
350
- {
351
- "epoch": 0.13,
352
- "learning_rate": 9.71906006716561e-05,
353
- "loss": 0.4588,
354
- "step": 25500
355
- },
356
- {
357
- "epoch": 0.13,
358
- "learning_rate": 9.705034604088048e-05,
359
- "loss": 0.4579,
360
- "step": 26000
361
- },
362
- {
363
- "epoch": 0.13,
364
- "learning_rate": 9.690780984987233e-05,
365
- "loss": 0.4586,
366
- "step": 26500
367
- },
368
- {
369
- "epoch": 0.14,
370
- "learning_rate": 9.67624296065391e-05,
371
- "loss": 0.4577,
372
- "step": 27000
373
- },
374
- {
375
- "epoch": 0.14,
376
- "learning_rate": 9.661449735541914e-05,
377
- "loss": 0.459,
378
- "step": 27500
379
- },
380
- {
381
- "epoch": 0.14,
382
- "learning_rate": 9.646402222167052e-05,
383
- "loss": 0.4576,
384
- "step": 28000
385
- },
386
- {
387
- "epoch": 0.14,
388
- "learning_rate": 9.631101348730831e-05,
389
- "loss": 0.4583,
390
- "step": 28500
391
- },
392
- {
393
- "epoch": 0.14,
394
- "learning_rate": 9.615548059063195e-05,
395
- "loss": 0.4571,
396
- "step": 29000
397
- },
398
- {
399
- "epoch": 0.15,
400
- "learning_rate": 9.599775172365814e-05,
401
- "loss": 0.4558,
402
- "step": 29500
403
- },
404
- {
405
- "epoch": 0.15,
406
- "learning_rate": 9.58375280271162e-05,
407
- "loss": 0.4555,
408
- "step": 30000
409
- },
410
- {
411
- "epoch": 0.15,
412
- "eval_loss": 0.43021583557128906,
413
- "eval_runtime": 269.3626,
414
- "eval_samples_per_second": 159.636,
415
- "eval_steps_per_second": 2.495,
416
- "step": 30000
417
- },
418
- {
419
- "epoch": 0.15,
420
- "learning_rate": 9.567449078707802e-05,
421
- "loss": 0.4555,
422
- "step": 30500
423
- },
424
- {
425
- "epoch": 0.15,
426
- "learning_rate": 9.550896864844414e-05,
427
- "loss": 0.4563,
428
- "step": 31000
429
- },
430
- {
431
- "epoch": 0.16,
432
- "learning_rate": 9.534097182139975e-05,
433
- "loss": 0.4554,
434
- "step": 31500
435
- },
436
- {
437
- "epoch": 0.16,
438
- "learning_rate": 9.517051066878048e-05,
439
- "loss": 0.4547,
440
- "step": 32000
441
- },
442
- {
443
- "epoch": 0.16,
444
- "learning_rate": 9.499759570543318e-05,
445
- "loss": 0.4537,
446
- "step": 32500
447
- },
448
- {
449
- "epoch": 0.17,
450
- "learning_rate": 9.482223759756727e-05,
451
- "loss": 0.4534,
452
- "step": 33000
453
- },
454
- {
455
- "epoch": 0.17,
456
- "learning_rate": 9.464444716209686e-05,
457
- "loss": 0.454,
458
- "step": 33500
459
- },
460
- {
461
- "epoch": 0.17,
462
- "learning_rate": 9.446423536597349e-05,
463
- "loss": 0.4528,
464
- "step": 34000
465
- },
466
- {
467
- "epoch": 0.17,
468
- "learning_rate": 9.42816133255096e-05,
469
- "loss": 0.4523,
470
- "step": 34500
471
- },
472
- {
473
- "epoch": 0.17,
474
- "learning_rate": 9.409659230569288e-05,
475
- "loss": 0.4543,
476
- "step": 35000
477
- },
478
- {
479
- "epoch": 0.17,
480
- "eval_loss": 0.4261643886566162,
481
- "eval_runtime": 271.7099,
482
- "eval_samples_per_second": 158.257,
483
- "eval_steps_per_second": 2.473,
484
- "step": 35000
485
- },
486
- {
487
- "epoch": 0.18,
488
- "learning_rate": 9.390918371949136e-05,
489
- "loss": 0.4533,
490
- "step": 35500
491
- },
492
- {
493
- "epoch": 0.18,
494
- "learning_rate": 9.371978105983142e-05,
495
- "loss": 0.4531,
496
- "step": 36000
497
- },
498
- {
499
- "epoch": 0.18,
500
- "learning_rate": 9.352802352506516e-05,
501
- "loss": 0.4508,
502
- "step": 36500
503
- },
504
- {
505
- "epoch": 0.18,
506
- "learning_rate": 9.333353157269004e-05,
507
- "loss": 0.4512,
508
- "step": 37000
509
- },
510
- {
511
- "epoch": 0.19,
512
- "learning_rate": 9.313669912311046e-05,
513
- "loss": 0.4522,
514
- "step": 37500
515
- },
516
- {
517
- "epoch": 0.19,
518
- "learning_rate": 9.293753831787908e-05,
519
- "loss": 0.4498,
520
- "step": 38000
521
- },
522
- {
523
- "epoch": 0.19,
524
- "learning_rate": 9.27360614421725e-05,
525
- "loss": 0.4491,
526
- "step": 38500
527
- },
528
- {
529
- "epoch": 0.2,
530
- "learning_rate": 9.25326907757735e-05,
531
- "loss": 0.4498,
532
- "step": 39000
533
- },
534
- {
535
- "epoch": 0.2,
536
- "learning_rate": 9.232662375484976e-05,
537
- "loss": 0.4503,
538
- "step": 39500
539
- },
540
- {
541
- "epoch": 0.2,
542
- "learning_rate": 9.211827834753872e-05,
543
- "loss": 0.4498,
544
- "step": 40000
545
- },
546
- {
547
- "epoch": 0.2,
548
- "eval_loss": 0.42189013957977295,
549
- "eval_runtime": 264.7676,
550
- "eval_samples_per_second": 162.407,
551
- "eval_steps_per_second": 2.538,
552
- "step": 40000
553
- },
554
- {
555
- "epoch": 0.0,
556
- "learning_rate": 9.190766740556652e-05,
557
- "loss": 0.4488,
558
- "step": 40500
559
- },
560
- {
561
- "epoch": 0.01,
562
- "learning_rate": 9.169480392040811e-05,
563
- "loss": 0.4482,
564
- "step": 41000
565
- },
566
- {
567
- "epoch": 0.01,
568
- "learning_rate": 9.147970102248595e-05,
569
- "loss": 0.4487,
570
- "step": 41500
571
- },
572
- {
573
- "epoch": 0.01,
574
- "learning_rate": 9.126280885124848e-05,
575
- "loss": 0.4484,
576
- "step": 42000
577
- },
578
- {
579
- "epoch": 0.01,
580
- "learning_rate": 9.104327148280447e-05,
581
- "loss": 0.4469,
582
- "step": 42500
583
- },
584
- {
585
- "epoch": 0.01,
586
- "learning_rate": 9.082153489118645e-05,
587
- "loss": 0.4483,
588
- "step": 43000
589
- },
590
- {
591
- "epoch": 0.02,
592
- "learning_rate": 9.059761275415186e-05,
593
- "loss": 0.4474,
594
- "step": 43500
595
- },
596
- {
597
- "epoch": 0.02,
598
- "learning_rate": 9.037151888427288e-05,
599
- "loss": 0.4483,
600
- "step": 44000
601
- },
602
- {
603
- "epoch": 0.02,
604
- "learning_rate": 9.014372587553127e-05,
605
- "loss": 0.4464,
606
- "step": 44500
607
- },
608
- {
609
- "epoch": 0.03,
610
- "learning_rate": 8.991333478594443e-05,
611
- "loss": 0.4466,
612
- "step": 45000
613
- },
614
- {
615
- "epoch": 0.03,
616
- "eval_loss": 0.4197126626968384,
617
- "eval_runtime": 296.3831,
618
- "eval_samples_per_second": 145.083,
619
- "eval_steps_per_second": 2.267,
620
- "step": 45000
621
- },
622
- {
623
- "epoch": 0.03,
624
- "learning_rate": 8.968081417300147e-05,
625
- "loss": 0.4457,
626
- "step": 45500
627
- },
628
- {
629
- "epoch": 0.03,
630
- "learning_rate": 8.944664975260744e-05,
631
- "loss": 0.4454,
632
- "step": 46000
633
- },
634
- {
635
- "epoch": 0.03,
636
- "learning_rate": 8.920991743920979e-05,
637
- "loss": 0.4466,
638
- "step": 46500
639
- },
640
- {
641
- "epoch": 0.04,
642
- "learning_rate": 8.89710989925514e-05,
643
- "loss": 0.445,
644
- "step": 47000
645
- },
646
- {
647
- "epoch": 0.04,
648
- "learning_rate": 8.8730209144079e-05,
649
- "loss": 0.4453,
650
- "step": 47500
651
- },
652
- {
653
- "epoch": 0.04,
654
- "learning_rate": 8.848726275301312e-05,
655
- "loss": 0.444,
656
- "step": 48000
657
- },
658
- {
659
- "epoch": 0.04,
660
- "learning_rate": 8.824227480543154e-05,
661
- "loss": 0.4442,
662
- "step": 48500
663
- },
664
- {
665
- "epoch": 0.04,
666
- "learning_rate": 8.799526041334489e-05,
667
- "loss": 0.4442,
668
- "step": 49000
669
- },
670
- {
671
- "epoch": 0.05,
672
- "learning_rate": 8.77462348137644e-05,
673
- "loss": 0.4433,
674
- "step": 49500
675
- },
676
- {
677
- "epoch": 0.05,
678
- "learning_rate": 8.749521336776206e-05,
679
- "loss": 0.443,
680
- "step": 50000
681
- },
682
- {
683
- "epoch": 0.05,
684
- "eval_loss": 0.41723188757896423,
685
- "eval_runtime": 268.864,
686
- "eval_samples_per_second": 159.932,
687
- "eval_steps_per_second": 2.499,
688
- "step": 50000
689
- },
690
- {
691
- "epoch": 0.05,
692
- "learning_rate": 8.724271952918828e-05,
693
- "loss": 0.4439,
694
- "step": 50500
695
- },
696
- {
697
- "epoch": 0.06,
698
- "learning_rate": 8.69877568789119e-05,
699
- "loss": 0.4432,
700
- "step": 51000
701
- },
702
- {
703
- "epoch": 0.06,
704
- "learning_rate": 8.673084516870541e-05,
705
- "loss": 0.4437,
706
- "step": 51500
707
- },
708
- {
709
- "epoch": 0.06,
710
- "learning_rate": 8.647251985469168e-05,
711
- "loss": 0.4429,
712
- "step": 52000
713
- },
714
- {
715
- "epoch": 0.06,
716
- "learning_rate": 8.621176150492572e-05,
717
- "loss": 0.4419,
718
- "step": 52500
719
- },
720
- {
721
- "epoch": 0.07,
722
- "learning_rate": 8.594910196227962e-05,
723
- "loss": 0.4422,
724
- "step": 53000
725
- },
726
- {
727
- "epoch": 0.07,
728
- "learning_rate": 8.568455742883119e-05,
729
- "loss": 0.4424,
730
- "step": 53500
731
- },
732
- {
733
- "epoch": 0.07,
734
- "learning_rate": 8.54181442229334e-05,
735
- "loss": 0.4421,
736
- "step": 54000
737
- },
738
- {
739
- "epoch": 0.07,
740
- "learning_rate": 8.51498787782077e-05,
741
- "loss": 0.4431,
742
- "step": 54500
743
- },
744
- {
745
- "epoch": 0.07,
746
- "learning_rate": 8.487977764253034e-05,
747
- "loss": 0.4408,
748
- "step": 55000
749
- },
750
- {
751
- "epoch": 0.07,
752
- "eval_loss": 0.41320380568504333,
753
- "eval_runtime": 276.5136,
754
- "eval_samples_per_second": 155.508,
755
- "eval_steps_per_second": 2.43,
756
- "step": 55000
757
- },
758
- {
759
- "epoch": 0.08,
760
- "learning_rate": 8.460785747701169e-05,
761
- "loss": 0.4411,
762
- "step": 55500
763
- },
764
- {
765
- "epoch": 0.08,
766
- "learning_rate": 8.433468428726034e-05,
767
- "loss": 0.4405,
768
- "step": 56000
769
- },
770
- {
771
- "epoch": 0.08,
772
- "learning_rate": 8.405918004700074e-05,
773
- "loss": 0.4399,
774
- "step": 56500
775
- },
776
- {
777
- "epoch": 0.09,
778
- "learning_rate": 8.378190739522489e-05,
779
- "loss": 0.4399,
780
- "step": 57000
781
- },
782
- {
783
- "epoch": 0.09,
784
- "learning_rate": 8.350288343541602e-05,
785
- "loss": 0.4396,
786
- "step": 57500
787
- },
788
- {
789
- "epoch": 0.09,
790
- "learning_rate": 8.322212537908629e-05,
791
- "loss": 0.4381,
792
- "step": 58000
793
- },
794
- {
795
- "epoch": 0.09,
796
- "learning_rate": 8.29396505447151e-05,
797
- "loss": 0.439,
798
- "step": 58500
799
- },
800
- {
801
- "epoch": 0.1,
802
- "learning_rate": 8.265547635668083e-05,
803
- "loss": 0.4398,
804
- "step": 59000
805
- },
806
- {
807
- "epoch": 0.1,
808
- "learning_rate": 8.236962034418596e-05,
809
- "loss": 0.4377,
810
- "step": 59500
811
- },
812
- {
813
- "epoch": 0.1,
814
- "learning_rate": 8.208267682967567e-05,
815
- "loss": 0.4393,
816
- "step": 60000
817
- },
818
- {
819
- "epoch": 0.1,
820
- "eval_loss": 0.41322511434555054,
821
- "eval_runtime": 270.2321,
822
- "eval_samples_per_second": 159.122,
823
- "eval_steps_per_second": 2.487,
824
- "step": 60000
825
- },
826
- {
827
- "epoch": 0.1,
828
- "learning_rate": 8.179409340302407e-05,
829
- "loss": 0.4383,
830
- "step": 60500
831
- },
832
- {
833
- "epoch": 0.1,
834
- "learning_rate": 8.150330460315606e-05,
835
- "loss": 0.4384,
836
- "step": 61000
837
- },
838
- {
839
- "epoch": 0.11,
840
- "learning_rate": 8.121090505020843e-05,
841
- "loss": 0.4387,
842
- "step": 61500
843
- },
844
- {
845
- "epoch": 0.11,
846
- "learning_rate": 8.091691278076293e-05,
847
- "loss": 0.4368,
848
- "step": 62000
849
- },
850
- {
851
- "epoch": 0.11,
852
- "learning_rate": 8.062134592964756e-05,
853
- "loss": 0.4377,
854
- "step": 62500
855
- },
856
- {
857
- "epoch": 0.12,
858
- "learning_rate": 8.032481851629047e-05,
859
- "loss": 0.4381,
860
- "step": 63000
861
- },
862
- {
863
- "epoch": 0.12,
864
- "learning_rate": 8.00267591904771e-05,
865
- "loss": 0.4371,
866
- "step": 63500
867
- },
868
- {
869
- "epoch": 0.12,
870
- "learning_rate": 7.972658441054002e-05,
871
- "loss": 0.4376,
872
- "step": 64000
873
- },
874
- {
875
- "epoch": 0.12,
876
- "learning_rate": 7.94249084739939e-05,
877
- "loss": 0.4359,
878
- "step": 64500
879
- },
880
- {
881
- "epoch": 0.12,
882
- "learning_rate": 7.912174998963153e-05,
883
- "loss": 0.4359,
884
- "step": 65000
885
- },
886
- {
887
- "epoch": 0.12,
888
- "eval_loss": 0.41053175926208496,
889
- "eval_runtime": 268.5256,
890
- "eval_samples_per_second": 160.134,
891
- "eval_steps_per_second": 2.503,
892
- "step": 65000
893
- },
894
- {
895
- "epoch": 0.13,
896
- "learning_rate": 7.88171276576962e-05,
897
- "loss": 0.4359,
898
- "step": 65500
899
- },
900
- {
901
- "epoch": 0.13,
902
- "learning_rate": 7.851167383313914e-05,
903
- "loss": 0.4372,
904
- "step": 66000
905
- },
906
- {
907
- "epoch": 0.13,
908
- "learning_rate": 7.820418310027099e-05,
909
- "loss": 0.436,
910
- "step": 66500
911
- },
912
- {
913
- "epoch": 0.14,
914
- "learning_rate": 7.789528511967774e-05,
915
- "loss": 0.4355,
916
- "step": 67000
917
- },
918
- {
919
- "epoch": 0.14,
920
- "learning_rate": 7.758499894564191e-05,
921
- "loss": 0.4333,
922
- "step": 67500
923
- },
924
- {
925
- "epoch": 0.14,
926
- "learning_rate": 7.727334371807629e-05,
927
- "loss": 0.4338,
928
- "step": 68000
929
- },
930
- {
931
- "epoch": 0.14,
932
- "learning_rate": 7.696033866134332e-05,
933
- "loss": 0.4355,
934
- "step": 68500
935
- },
936
- {
937
- "epoch": 0.14,
938
- "learning_rate": 7.664600308306929e-05,
939
- "loss": 0.4351,
940
- "step": 69000
941
- },
942
- {
943
- "epoch": 0.15,
944
- "learning_rate": 7.633035637295333e-05,
945
- "loss": 0.4349,
946
- "step": 69500
947
- },
948
- {
949
- "epoch": 0.15,
950
- "learning_rate": 7.601341800157135e-05,
951
- "loss": 0.4345,
952
- "step": 70000
953
- },
954
- {
955
- "epoch": 0.15,
956
- "eval_loss": 0.4069764316082001,
957
- "eval_runtime": 286.0485,
958
- "eval_samples_per_second": 150.324,
959
- "eval_steps_per_second": 2.349,
960
- "step": 70000
961
- },
962
- {
963
- "epoch": 0.15,
964
- "learning_rate": 7.569584519667294e-05,
965
- "loss": 0.4335,
966
- "step": 70500
967
- },
968
- {
969
- "epoch": 0.15,
970
- "learning_rate": 7.537702487519748e-05,
971
- "loss": 0.4336,
972
- "step": 71000
973
- },
974
- {
975
- "epoch": 0.16,
976
- "learning_rate": 7.505633402594554e-05,
977
- "loss": 0.4338,
978
- "step": 71500
979
- },
980
- {
981
- "epoch": 0.16,
982
- "learning_rate": 7.473443010312711e-05,
983
- "loss": 0.4342,
984
- "step": 72000
985
- },
986
- {
987
- "epoch": 0.16,
988
- "learning_rate": 7.441133296329247e-05,
989
- "loss": 0.4325,
990
- "step": 72500
991
- },
992
- {
993
- "epoch": 0.17,
994
- "learning_rate": 7.408771223510569e-05,
995
- "loss": 0.433,
996
- "step": 73000
997
- },
998
- {
999
- "epoch": 0.17,
1000
- "learning_rate": 7.376229081063164e-05,
1001
- "loss": 0.4322,
1002
- "step": 73500
1003
- },
1004
- {
1005
- "epoch": 0.17,
1006
- "learning_rate": 7.343573613527213e-05,
1007
- "loss": 0.4323,
1008
- "step": 74000
1009
- },
1010
- {
1011
- "epoch": 0.17,
1012
- "learning_rate": 7.310872478548158e-05,
1013
- "loss": 0.4329,
1014
- "step": 74500
1015
- },
1016
- {
1017
- "epoch": 0.17,
1018
- "learning_rate": 7.277996627287863e-05,
1019
- "loss": 0.4321,
1020
- "step": 75000
1021
- },
1022
- {
1023
- "epoch": 0.17,
1024
- "eval_loss": 0.40518081188201904,
1025
- "eval_runtime": 272.4309,
1026
- "eval_samples_per_second": 157.838,
1027
- "eval_steps_per_second": 2.467,
1028
- "step": 75000
1029
- },
1030
- {
1031
- "epoch": 0.18,
1032
- "learning_rate": 7.245013510379486e-05,
1033
- "loss": 0.4324,
1034
- "step": 75500
1035
- },
1036
- {
1037
- "epoch": 0.18,
1038
- "learning_rate": 7.211925162377042e-05,
1039
- "loss": 0.4305,
1040
- "step": 76000
1041
- },
1042
- {
1043
- "epoch": 0.18,
1044
- "learning_rate": 7.178733624325697e-05,
1045
- "loss": 0.4312,
1046
- "step": 76500
1047
- },
1048
- {
1049
- "epoch": 0.18,
1050
- "learning_rate": 7.145440943635861e-05,
1051
- "loss": 0.4318,
1052
- "step": 77000
1053
- },
1054
- {
1055
- "epoch": 0.19,
1056
- "learning_rate": 7.112049173956905e-05,
1057
- "loss": 0.43,
1058
- "step": 77500
1059
- },
1060
- {
1061
- "epoch": 0.19,
1062
- "learning_rate": 7.07856037505047e-05,
1063
- "loss": 0.431,
1064
- "step": 78000
1065
- },
1066
- {
1067
- "epoch": 0.19,
1068
- "learning_rate": 7.04497661266342e-05,
1069
- "loss": 0.4302,
1070
- "step": 78500
1071
- },
1072
- {
1073
- "epoch": 0.2,
1074
- "learning_rate": 7.01129995840041e-05,
1075
- "loss": 0.4288,
1076
- "step": 79000
1077
- },
1078
- {
1079
- "epoch": 0.2,
1080
- "learning_rate": 6.97753248959611e-05,
1081
- "loss": 0.4321,
1082
- "step": 79500
1083
- },
1084
- {
1085
- "epoch": 0.2,
1086
- "learning_rate": 6.943676289187054e-05,
1087
- "loss": 0.4288,
1088
- "step": 80000
1089
- },
1090
- {
1091
- "epoch": 0.2,
1092
- "eval_loss": 0.4031592011451721,
1093
- "eval_runtime": 278.2873,
1094
- "eval_samples_per_second": 154.517,
1095
- "eval_steps_per_second": 2.415,
1096
- "step": 80000
1097
- },
1098
- {
1099
- "epoch": 0.0,
1100
- "learning_rate": 6.9098014163495e-05,
1101
- "loss": 0.4297,
1102
- "step": 80500
1103
- },
1104
- {
1105
- "epoch": 0.01,
1106
- "learning_rate": 6.875774190311069e-05,
1107
- "loss": 0.429,
1108
- "step": 81000
1109
- },
1110
- {
1111
- "epoch": 0.01,
1112
- "learning_rate": 6.841732809852635e-05,
1113
- "loss": 0.4286,
1114
- "step": 81500
1115
- },
1116
- {
1117
- "epoch": 0.01,
1118
- "learning_rate": 6.80754293711102e-05,
1119
- "loss": 0.4287,
1120
- "step": 82000
1121
- },
1122
- {
1123
- "epoch": 0.01,
1124
- "learning_rate": 6.773274818520966e-05,
1125
- "loss": 0.4273,
1126
- "step": 82500
1127
- },
1128
- {
1129
- "epoch": 0.01,
1130
- "learning_rate": 6.738930567901446e-05,
1131
- "loss": 0.4277,
1132
- "step": 83000
1133
- },
1134
- {
1135
- "epoch": 0.02,
1136
- "learning_rate": 6.704512303767616e-05,
1137
- "loss": 0.4283,
1138
- "step": 83500
1139
- },
1140
- {
1141
- "epoch": 0.02,
1142
- "learning_rate": 6.67002214920013e-05,
1143
- "loss": 0.4286,
1144
- "step": 84000
1145
- },
1146
- {
1147
- "epoch": 0.02,
1148
- "learning_rate": 6.635462231714186e-05,
1149
- "loss": 0.4278,
1150
- "step": 84500
1151
- },
1152
- {
1153
- "epoch": 0.03,
1154
- "learning_rate": 6.600904004302253e-05,
1155
- "loss": 0.4269,
1156
- "step": 85000
1157
- },
1158
- {
1159
- "epoch": 0.03,
1160
- "eval_loss": 0.40107953548431396,
1161
- "eval_runtime": 324.3295,
1162
- "eval_samples_per_second": 132.581,
1163
- "eval_steps_per_second": 2.072,
1164
- "step": 85000
1165
- },
1166
- {
1167
- "epoch": 0.03,
1168
- "learning_rate": 6.566211089462497e-05,
1169
- "loss": 0.4278,
1170
- "step": 85500
1171
- },
1172
- {
1173
- "epoch": 0.03,
1174
- "learning_rate": 6.531454815259442e-05,
1175
- "loss": 0.4275,
1176
- "step": 86000
1177
- },
1178
- {
1179
- "epoch": 0.03,
1180
- "learning_rate": 6.496637325623796e-05,
1181
- "loss": 0.4251,
1182
- "step": 86500
1183
- },
1184
- {
1185
- "epoch": 0.04,
1186
- "learning_rate": 6.461760768262325e-05,
1187
- "loss": 0.4271,
1188
- "step": 87000
1189
- },
1190
- {
1191
- "epoch": 0.04,
1192
- "learning_rate": 6.426897216843693e-05,
1193
- "loss": 0.4261,
1194
- "step": 87500
1195
- },
1196
- {
1197
- "epoch": 0.04,
1198
- "learning_rate": 6.391909088962678e-05,
1199
- "loss": 0.4262,
1200
- "step": 88000
1201
- },
1202
- {
1203
- "epoch": 0.04,
1204
- "learning_rate": 6.356868353486788e-05,
1205
- "loss": 0.4275,
1206
- "step": 88500
1207
- },
1208
- {
1209
- "epoch": 0.04,
1210
- "learning_rate": 6.321777171893646e-05,
1211
- "loss": 0.4251,
1212
- "step": 89000
1213
- },
1214
- {
1215
- "epoch": 0.05,
1216
- "learning_rate": 6.286637708772627e-05,
1217
- "loss": 0.4256,
1218
- "step": 89500
1219
- },
1220
- {
1221
- "epoch": 0.05,
1222
- "learning_rate": 6.251452131691338e-05,
1223
- "loss": 0.4246,
1224
- "step": 90000
1225
- },
1226
- {
1227
- "epoch": 0.05,
1228
- "eval_loss": 0.399631530046463,
1229
- "eval_runtime": 304.7497,
1230
- "eval_samples_per_second": 141.099,
1231
- "eval_steps_per_second": 2.205,
1232
- "step": 90000
1233
- },
1234
- {
1235
- "epoch": 0.05,
1236
- "learning_rate": 6.216293112514862e-05,
1237
- "loss": 0.4251,
1238
- "step": 90500
1239
- },
1240
- {
1241
- "epoch": 0.06,
1242
- "learning_rate": 6.181021902830414e-05,
1243
- "loss": 0.4251,
1244
- "step": 91000
1245
- },
1246
- {
1247
- "epoch": 0.06,
1248
- "learning_rate": 6.145711094066101e-05,
1249
- "loss": 0.4256,
1250
- "step": 91500
1251
- },
1252
- {
1253
- "epoch": 0.06,
1254
- "learning_rate": 6.110362864358936e-05,
1255
- "loss": 0.4232,
1256
- "step": 92000
1257
- },
1258
- {
1259
- "epoch": 0.06,
1260
- "learning_rate": 6.07505019481429e-05,
1261
- "loss": 0.4244,
1262
- "step": 92500
1263
- },
1264
- {
1265
- "epoch": 0.07,
1266
- "learning_rate": 6.039633730667286e-05,
1267
- "loss": 0.4252,
1268
- "step": 93000
1269
- },
1270
- {
1271
- "epoch": 0.07,
1272
- "learning_rate": 6.004186388928874e-05,
1273
- "loss": 0.4234,
1274
- "step": 93500
1275
- },
1276
- {
1277
- "epoch": 0.07,
1278
- "learning_rate": 5.968710356158062e-05,
1279
- "loss": 0.4232,
1280
- "step": 94000
1281
- },
1282
- {
1283
- "epoch": 0.07,
1284
- "learning_rate": 5.933207820683662e-05,
1285
- "loss": 0.4229,
1286
- "step": 94500
1287
- },
1288
- {
1289
- "epoch": 0.07,
1290
- "learning_rate": 5.897752048973475e-05,
1291
- "loss": 0.4227,
1292
- "step": 95000
1293
- },
1294
- {
1295
- "epoch": 0.07,
1296
- "eval_loss": 0.3970061242580414,
1297
- "eval_runtime": 304.9641,
1298
- "eval_samples_per_second": 141.0,
1299
- "eval_steps_per_second": 2.204,
1300
- "step": 95000
1301
- },
1302
- {
1303
- "epoch": 0.08,
1304
- "learning_rate": 5.8622031215370216e-05,
1305
- "loss": 0.4228,
1306
- "step": 95500
1307
- },
1308
- {
1309
- "epoch": 0.08,
1310
- "learning_rate": 5.826634261264905e-05,
1311
- "loss": 0.4215,
1312
- "step": 96000
1313
- },
1314
- {
1315
- "epoch": 0.08,
1316
- "learning_rate": 5.7910476622119674e-05,
1317
- "loss": 0.4227,
1318
- "step": 96500
1319
- },
1320
- {
1321
- "epoch": 0.09,
1322
- "learning_rate": 5.7554455195272715e-05,
1323
- "loss": 0.4227,
1324
- "step": 97000
1325
- },
1326
- {
1327
- "epoch": 0.09,
1328
- "learning_rate": 5.7198300293186744e-05,
1329
- "loss": 0.4218,
1330
- "step": 97500
1331
- },
1332
- {
1333
- "epoch": 0.09,
1334
- "learning_rate": 5.684203388517376e-05,
1335
- "loss": 0.4214,
1336
- "step": 98000
1337
- },
1338
- {
1339
- "epoch": 0.09,
1340
- "learning_rate": 5.6485677947424015e-05,
1341
- "loss": 0.4217,
1342
- "step": 98500
1343
- },
1344
- {
1345
- "epoch": 0.1,
1346
- "learning_rate": 5.612996736142203e-05,
1347
- "loss": 0.4224,
1348
- "step": 99000
1349
- },
1350
- {
1351
- "epoch": 0.1,
1352
- "learning_rate": 5.577421135154171e-05,
1353
- "loss": 0.4213,
1354
- "step": 99500
1355
- },
1356
- {
1357
- "epoch": 0.1,
1358
- "learning_rate": 5.5417718780659065e-05,
1359
- "loss": 0.423,
1360
- "step": 100000
1361
- },
1362
- {
1363
- "epoch": 0.1,
1364
- "eval_loss": 0.395292192697525,
1365
- "eval_runtime": 296.1319,
1366
- "eval_samples_per_second": 145.206,
1367
- "eval_steps_per_second": 2.269,
1368
- "step": 100000
1369
- },
1370
- {
1371
- "epoch": 0.1,
1372
- "learning_rate": 5.50612245385044e-05,
1373
- "loss": 0.4212,
1374
- "step": 100500
1375
- },
1376
- {
1377
- "epoch": 0.1,
1378
- "learning_rate": 5.470475061532183e-05,
1379
- "loss": 0.4202,
1380
- "step": 101000
1381
- },
1382
- {
1383
- "epoch": 0.11,
1384
- "learning_rate": 5.4348319000102045e-05,
1385
- "loss": 0.4217,
1386
- "step": 101500
1387
- },
1388
- {
1389
- "epoch": 0.11,
1390
- "learning_rate": 5.399266433509025e-05,
1391
- "loss": 0.4193,
1392
- "step": 102000
1393
- },
1394
- {
1395
- "epoch": 0.11,
1396
- "learning_rate": 5.363638309648435e-05,
1397
- "loss": 0.4214,
1398
- "step": 102500
1399
- },
1400
- {
1401
- "epoch": 0.12,
1402
- "learning_rate": 5.328021006778228e-05,
1403
- "loss": 0.4197,
1404
- "step": 103000
1405
- },
1406
- {
1407
- "epoch": 0.12,
1408
- "learning_rate": 5.292416721941417e-05,
1409
- "loss": 0.4197,
1410
- "step": 103500
1411
- },
1412
- {
1413
- "epoch": 0.12,
1414
- "learning_rate": 5.2568988128759686e-05,
1415
- "loss": 0.4195,
1416
- "step": 104000
1417
- },
1418
- {
1419
- "epoch": 0.12,
1420
- "learning_rate": 5.2213271148780935e-05,
1421
- "loss": 0.4199,
1422
- "step": 104500
1423
- },
1424
- {
1425
- "epoch": 0.12,
1426
- "learning_rate": 5.1857750162954374e-05,
1427
- "loss": 0.4206,
1428
- "step": 105000
1429
- },
1430
- {
1431
- "epoch": 0.12,
1432
- "eval_loss": 0.394319623708725,
1433
- "eval_runtime": 306.4174,
1434
- "eval_samples_per_second": 140.331,
1435
- "eval_steps_per_second": 2.193,
1436
- "step": 105000
1437
- },
1438
- {
1439
- "epoch": 0.13,
1440
- "learning_rate": 5.1502447101489115e-05,
1441
- "loss": 0.4185,
1442
- "step": 105500
1443
- },
1444
- {
1445
- "epoch": 0.13,
1446
- "learning_rate": 5.114738388115157e-05,
1447
- "loss": 0.4184,
1448
- "step": 106000
1449
- },
1450
- {
1451
- "epoch": 0.13,
1452
- "learning_rate": 5.079329173109894e-05,
1453
- "loss": 0.4179,
1454
- "step": 106500
1455
- },
1456
- {
1457
- "epoch": 0.14,
1458
- "learning_rate": 5.043877329369859e-05,
1459
- "loss": 0.4199,
1460
- "step": 107000
1461
- },
1462
- {
1463
- "epoch": 0.14,
1464
- "learning_rate": 5.008456030983681e-05,
1465
- "loss": 0.419,
1466
- "step": 107500
1467
- },
1468
- {
1469
- "epoch": 0.14,
1470
- "learning_rate": 4.973067462903889e-05,
1471
- "loss": 0.4167,
1472
- "step": 108000
1473
- },
1474
- {
1475
- "epoch": 0.14,
1476
- "learning_rate": 4.937784479080462e-05,
1477
- "loss": 0.417,
1478
- "step": 108500
1479
- },
1480
- {
1481
- "epoch": 0.14,
1482
- "learning_rate": 4.902467841897598e-05,
1483
- "loss": 0.418,
1484
- "step": 109000
1485
- },
1486
- {
1487
- "epoch": 0.15,
1488
- "learning_rate": 4.867190472871874e-05,
1489
- "loss": 0.4177,
1490
- "step": 109500
1491
- },
1492
- {
1493
- "epoch": 0.15,
1494
- "learning_rate": 4.8319545480775854e-05,
1495
- "loss": 0.4172,
1496
- "step": 110000
1497
- },
1498
- {
1499
- "epoch": 0.15,
1500
- "eval_loss": 0.3924800455570221,
1501
- "eval_runtime": 289.0122,
1502
- "eval_samples_per_second": 148.783,
1503
- "eval_steps_per_second": 2.325,
1504
- "step": 110000
1505
- },
1506
- {
1507
- "epoch": 0.15,
1508
- "learning_rate": 4.796762241032546e-05,
1509
- "loss": 0.4173,
1510
- "step": 110500
1511
- },
1512
- {
1513
- "epoch": 0.15,
1514
- "learning_rate": 4.7616157225640255e-05,
1515
- "loss": 0.417,
1516
- "step": 111000
1517
- },
1518
- {
1519
- "epoch": 0.16,
1520
- "learning_rate": 4.7265873084984087e-05,
1521
- "loss": 0.4173,
1522
- "step": 111500
1523
- },
1524
- {
1525
- "epoch": 0.16,
1526
- "learning_rate": 4.6915387658312535e-05,
1527
- "loss": 0.416,
1528
- "step": 112000
1529
- },
1530
- {
1531
- "epoch": 0.16,
1532
- "learning_rate": 4.656542502420213e-05,
1533
- "loss": 0.4165,
1534
- "step": 112500
1535
- },
1536
- {
1537
- "epoch": 0.17,
1538
- "learning_rate": 4.621600676999665e-05,
1539
- "loss": 0.4163,
1540
- "step": 113000
1541
- },
1542
- {
1543
- "epoch": 0.17,
1544
- "learning_rate": 4.586715444945989e-05,
1545
- "loss": 0.4161,
1546
- "step": 113500
1547
- },
1548
- {
1549
- "epoch": 0.17,
1550
- "learning_rate": 4.551888958144626e-05,
1551
- "loss": 0.417,
1552
- "step": 114000
1553
- },
1554
- {
1555
- "epoch": 0.17,
1556
- "learning_rate": 4.517123364857326e-05,
1557
- "loss": 0.4158,
1558
- "step": 114500
1559
- },
1560
- {
1561
- "epoch": 0.17,
1562
- "learning_rate": 4.4824208095896454e-05,
1563
- "loss": 0.416,
1564
- "step": 115000
1565
- },
1566
- {
1567
- "epoch": 0.17,
1568
- "eval_loss": 0.39157894253730774,
1569
- "eval_runtime": 277.9435,
1570
- "eval_samples_per_second": 154.708,
1571
- "eval_steps_per_second": 2.418,
1572
- "step": 115000
1573
- },
1574
- {
1575
- "epoch": 0.18,
1576
- "learning_rate": 4.4477834329586547e-05,
1577
- "loss": 0.4148,
1578
- "step": 115500
1579
- },
1580
- {
1581
- "epoch": 0.18,
1582
- "learning_rate": 4.4132133715609044e-05,
1583
- "loss": 0.4157,
1584
- "step": 116000
1585
- },
1586
- {
1587
- "epoch": 0.18,
1588
- "learning_rate": 4.378712757840617e-05,
1589
- "loss": 0.4154,
1590
- "step": 116500
1591
- },
1592
- {
1593
- "epoch": 0.18,
1594
- "learning_rate": 4.344283719958163e-05,
1595
- "loss": 0.4156,
1596
- "step": 117000
1597
- },
1598
- {
1599
- "epoch": 0.19,
1600
- "learning_rate": 4.3099283816587726e-05,
1601
- "loss": 0.415,
1602
- "step": 117500
1603
- },
1604
- {
1605
- "epoch": 0.19,
1606
- "learning_rate": 4.2757173441071825e-05,
1607
- "loss": 0.4143,
1608
- "step": 118000
1609
- },
1610
- {
1611
- "epoch": 0.19,
1612
- "learning_rate": 4.241515599920613e-05,
1613
- "loss": 0.4152,
1614
- "step": 118500
1615
- },
1616
- {
1617
- "epoch": 0.2,
1618
- "learning_rate": 4.207462056671048e-05,
1619
- "loss": 0.4138,
1620
- "step": 119000
1621
- },
1622
- {
1623
- "epoch": 0.2,
1624
- "learning_rate": 4.173422328497157e-05,
1625
- "loss": 0.4143,
1626
- "step": 119500
1627
- },
1628
- {
1629
- "epoch": 0.2,
1630
- "learning_rate": 4.139466839441784e-05,
1631
- "loss": 0.4154,
1632
- "step": 120000
1633
- },
1634
- {
1635
- "epoch": 0.2,
1636
- "eval_loss": 0.3890155851840973,
1637
- "eval_runtime": 291.4194,
1638
- "eval_samples_per_second": 147.554,
1639
- "eval_steps_per_second": 2.306,
1640
- "step": 120000
1641
- },
1642
- {
1643
- "epoch": 0.0,
1644
- "learning_rate": 4.105597684039436e-05,
1645
- "loss": 0.4022,
1646
- "step": 120500
1647
- },
1648
- {
1649
- "epoch": 0.01,
1650
- "learning_rate": 4.071816951499159e-05,
1651
- "loss": 0.3996,
1652
- "step": 121000
1653
- },
1654
- {
1655
- "epoch": 0.01,
1656
- "learning_rate": 4.038194014319665e-05,
1657
- "loss": 0.3976,
1658
- "step": 121500
1659
- },
1660
- {
1661
- "epoch": 0.01,
1662
- "learning_rate": 4.00459618594495e-05,
1663
- "loss": 0.3971,
1664
- "step": 122000
1665
- },
1666
- {
1667
- "epoch": 0.01,
1668
- "learning_rate": 3.971093010680468e-05,
1669
- "loss": 0.3963,
1670
- "step": 122500
1671
- },
1672
- {
1673
- "epoch": 0.01,
1674
- "learning_rate": 3.937686555159882e-05,
1675
- "loss": 0.3957,
1676
- "step": 123000
1677
- },
1678
- {
1679
- "epoch": 0.02,
1680
- "learning_rate": 3.9044453954514625e-05,
1681
- "loss": 0.395,
1682
- "step": 123500
1683
- },
1684
- {
1685
- "epoch": 0.02,
1686
- "learning_rate": 3.871238351611711e-05,
1687
- "loss": 0.3956,
1688
- "step": 124000
1689
- },
1690
- {
1691
- "epoch": 0.02,
1692
- "learning_rate": 3.83820029131894e-05,
1693
- "loss": 0.3954,
1694
- "step": 124500
1695
- },
1696
- {
1697
- "epoch": 0.03,
1698
- "learning_rate": 3.805200836123052e-05,
1699
- "loss": 0.394,
1700
- "step": 125000
1701
- },
1702
- {
1703
- "epoch": 0.03,
1704
- "eval_loss": 0.3622290790081024,
1705
- "eval_runtime": 282.4866,
1706
- "eval_samples_per_second": 152.22,
1707
- "eval_steps_per_second": 2.379,
1708
- "step": 125000
1709
- },
1710
- {
1711
- "epoch": 0.03,
1712
- "learning_rate": 3.772308333681807e-05,
1713
- "loss": 0.3947,
1714
- "step": 125500
1715
- },
1716
- {
1717
- "epoch": 0.03,
1718
- "learning_rate": 3.739524812959698e-05,
1719
- "loss": 0.3934,
1720
- "step": 126000
1721
- },
1722
- {
1723
- "epoch": 0.03,
1724
- "learning_rate": 3.7068522961987034e-05,
1725
- "loss": 0.3918,
1726
- "step": 126500
1727
- },
1728
- {
1729
- "epoch": 0.04,
1730
- "learning_rate": 3.6742927987935615e-05,
1731
- "loss": 0.3942,
1732
- "step": 127000
1733
- },
1734
- {
1735
- "epoch": 0.04,
1736
- "learning_rate": 3.6418483291674376e-05,
1737
- "loss": 0.3934,
1738
- "step": 127500
1739
- },
1740
- {
1741
- "epoch": 0.04,
1742
- "learning_rate": 3.6095208886480486e-05,
1743
- "loss": 0.3927,
1744
- "step": 128000
1745
- },
1746
- {
1747
- "epoch": 0.04,
1748
- "learning_rate": 3.577312471344201e-05,
1749
- "loss": 0.3949,
1750
- "step": 128500
1751
- },
1752
- {
1753
- "epoch": 0.04,
1754
- "learning_rate": 3.545225064022787e-05,
1755
- "loss": 0.3927,
1756
- "step": 129000
1757
- },
1758
- {
1759
- "epoch": 0.05,
1760
- "learning_rate": 3.513324450767193e-05,
1761
- "loss": 0.3933,
1762
- "step": 129500
1763
- },
1764
- {
1765
- "epoch": 0.05,
1766
- "learning_rate": 3.481484741846708e-05,
1767
- "loss": 0.3911,
1768
- "step": 130000
1769
- },
1770
- {
1771
- "epoch": 0.05,
1772
- "eval_loss": 0.3602633476257324,
1773
- "eval_runtime": 265.9858,
1774
- "eval_samples_per_second": 161.663,
1775
- "eval_steps_per_second": 2.526,
1776
- "step": 130000
1777
- },
1778
- {
1779
- "epoch": 0.05,
1780
- "learning_rate": 3.449771954014401e-05,
1781
- "loss": 0.393,
1782
- "step": 130500
1783
- },
1784
- {
1785
- "epoch": 0.06,
1786
- "learning_rate": 3.418188043464409e-05,
1787
- "loss": 0.3919,
1788
- "step": 131000
1789
- },
1790
- {
1791
- "epoch": 0.06,
1792
- "learning_rate": 3.3867977327563496e-05,
1793
- "loss": 0.3922,
1794
- "step": 131500
1795
- },
1796
- {
1797
- "epoch": 0.06,
1798
- "learning_rate": 3.3554771459719055e-05,
1799
- "loss": 0.3908,
1800
- "step": 132000
1801
- },
1802
- {
1803
- "epoch": 0.06,
1804
- "learning_rate": 3.324291253017703e-05,
1805
- "loss": 0.39,
1806
- "step": 132500
1807
- },
1808
- {
1809
- "epoch": 0.07,
1810
- "learning_rate": 3.29330393851866e-05,
1811
- "loss": 0.391,
1812
- "step": 133000
1813
- },
1814
- {
1815
- "epoch": 0.07,
1816
- "learning_rate": 3.262454599427242e-05,
1817
- "loss": 0.39,
1818
- "step": 133500
1819
- },
1820
- {
1821
- "epoch": 0.07,
1822
- "learning_rate": 3.231683730748652e-05,
1823
- "loss": 0.3907,
1824
- "step": 134000
1825
- },
1826
- {
1827
- "epoch": 0.07,
1828
- "learning_rate": 3.201055192061272e-05,
1829
- "loss": 0.3888,
1830
- "step": 134500
1831
- },
1832
- {
1833
- "epoch": 0.07,
1834
- "learning_rate": 3.170570872677642e-05,
1835
- "loss": 0.3891,
1836
- "step": 135000
1837
- },
1838
- {
1839
- "epoch": 0.07,
1840
- "eval_loss": 0.35786134004592896,
1841
- "eval_runtime": 272.1536,
1842
- "eval_samples_per_second": 157.999,
1843
- "eval_steps_per_second": 2.469,
1844
- "step": 135000
1845
- },
1846
- {
1847
- "epoch": 0.08,
1848
- "learning_rate": 3.1402326530141794e-05,
1849
- "loss": 0.3904,
1850
- "step": 135500
1851
- },
1852
- {
1853
- "epoch": 0.08,
1854
- "learning_rate": 3.110042404475174e-05,
1855
- "loss": 0.3873,
1856
- "step": 136000
1857
- },
1858
- {
1859
- "epoch": 0.08,
1860
- "learning_rate": 3.080001989337368e-05,
1861
- "loss": 0.3885,
1862
- "step": 136500
1863
- },
1864
- {
1865
- "epoch": 0.09,
1866
- "learning_rate": 3.050113260635069e-05,
1867
- "loss": 0.3892,
1868
- "step": 137000
1869
- },
1870
- {
1871
- "epoch": 0.09,
1872
- "learning_rate": 3.0203780620458514e-05,
1873
- "loss": 0.3891,
1874
- "step": 137500
1875
- },
1876
- {
1877
- "epoch": 0.09,
1878
- "learning_rate": 2.990798227776831e-05,
1879
- "loss": 0.3884,
1880
- "step": 138000
1881
- },
1882
- {
1883
- "epoch": 0.09,
1884
- "learning_rate": 2.9613755824515164e-05,
1885
- "loss": 0.3893,
1886
- "step": 138500
1887
- },
1888
- {
1889
- "epoch": 0.1,
1890
- "learning_rate": 2.9321119409972645e-05,
1891
- "loss": 0.389,
1892
- "step": 139000
1893
- },
1894
- {
1895
- "epoch": 0.1,
1896
- "learning_rate": 2.903009108533329e-05,
1897
- "loss": 0.3884,
1898
- "step": 139500
1899
- },
1900
- {
1901
- "epoch": 0.1,
1902
- "learning_rate": 2.8740688802594957e-05,
1903
- "loss": 0.3892,
1904
- "step": 140000
1905
- },
1906
- {
1907
- "epoch": 0.1,
1908
- "eval_loss": 0.3559122383594513,
1909
- "eval_runtime": 296.1288,
1910
- "eval_samples_per_second": 145.207,
1911
- "eval_steps_per_second": 2.269,
1912
- "step": 140000
1913
- },
1914
- {
1915
- "epoch": 0.1,
1916
- "learning_rate": 2.8453504277811132e-05,
1917
- "loss": 0.3876,
1918
- "step": 140500
1919
- },
1920
- {
1921
- "epoch": 0.1,
1922
- "learning_rate": 2.816740419162523e-05,
1923
- "loss": 0.3869,
1924
- "step": 141000
1925
- },
1926
- {
1927
- "epoch": 0.11,
1928
- "learning_rate": 2.7882983361931612e-05,
1929
- "loss": 0.3877,
1930
- "step": 141500
1931
- },
1932
- {
1933
- "epoch": 0.11,
1934
- "learning_rate": 2.760025933314676e-05,
1935
- "loss": 0.3862,
1936
- "step": 142000
1937
- },
1938
- {
1939
- "epoch": 0.11,
1940
- "learning_rate": 2.7320370146396765e-05,
1941
- "loss": 0.3872,
1942
- "step": 142500
1943
- },
1944
- {
1945
- "epoch": 0.12,
1946
- "learning_rate": 2.7041084972249825e-05,
1947
- "loss": 0.3865,
1948
- "step": 143000
1949
- },
1950
- {
1951
- "epoch": 0.12,
1952
- "learning_rate": 2.6763548531270056e-05,
1953
- "loss": 0.3866,
1954
- "step": 143500
1955
- },
1956
- {
1957
- "epoch": 0.12,
1958
- "learning_rate": 2.648777794321244e-05,
1959
- "loss": 0.3857,
1960
- "step": 144000
1961
- },
1962
- {
1963
- "epoch": 0.12,
1964
- "learning_rate": 2.621379021890586e-05,
1965
- "loss": 0.3857,
1966
- "step": 144500
1967
- },
1968
- {
1969
- "epoch": 0.12,
1970
- "learning_rate": 2.5941602259203728e-05,
1971
- "loss": 0.3873,
1972
- "step": 145000
1973
- },
1974
- {
1975
- "epoch": 0.12,
1976
- "eval_loss": 0.3540988564491272,
1977
- "eval_runtime": 283.9076,
1978
- "eval_samples_per_second": 151.458,
1979
- "eval_steps_per_second": 2.367,
1980
- "step": 145000
1981
- },
1982
- {
1983
- "epoch": 0.13,
1984
- "learning_rate": 2.5671230853941542e-05,
1985
- "loss": 0.3862,
1986
- "step": 145500
1987
- },
1988
- {
1989
- "epoch": 0.13,
1990
- "learning_rate": 2.5402692680901127e-05,
1991
- "loss": 0.3861,
1992
- "step": 146000
1993
- },
1994
- {
1995
- "epoch": 0.13,
1996
- "learning_rate": 2.5136004304781915e-05,
1997
- "loss": 0.3857,
1998
- "step": 146500
1999
- },
2000
- {
2001
- "epoch": 0.14,
2002
- "learning_rate": 2.487170994704443e-05,
2003
- "loss": 0.3851,
2004
- "step": 147000
2005
- },
2006
- {
2007
- "epoch": 0.14,
2008
- "learning_rate": 2.4608766620042728e-05,
2009
- "loss": 0.3852,
2010
- "step": 147500
2011
- },
2012
- {
2013
- "epoch": 0.14,
2014
- "learning_rate": 2.434772206306137e-05,
2015
- "loss": 0.3838,
2016
- "step": 148000
2017
- },
2018
- {
2019
- "epoch": 0.14,
2020
- "learning_rate": 2.4088592378558306e-05,
2021
- "loss": 0.3852,
2022
- "step": 148500
2023
- },
2024
- {
2025
- "epoch": 0.14,
2026
- "learning_rate": 2.3831393550873072e-05,
2027
- "loss": 0.3847,
2028
- "step": 149000
2029
- },
2030
- {
2031
- "epoch": 0.15,
2032
- "learning_rate": 2.3576649996138134e-05,
2033
- "loss": 0.3836,
2034
- "step": 149500
2035
- },
2036
- {
2037
- "epoch": 0.15,
2038
- "learning_rate": 2.332335641714342e-05,
2039
- "loss": 0.3842,
2040
- "step": 150000
2041
- },
2042
- {
2043
- "epoch": 0.15,
2044
- "eval_loss": 0.35215088725090027,
2045
- "eval_runtime": 263.7454,
2046
- "eval_samples_per_second": 163.036,
2047
- "eval_steps_per_second": 2.548,
2048
- "step": 150000
2049
- },
2050
- {
2051
- "epoch": 0.0,
2052
- "learning_rate": 2.3072541544934485e-05,
2053
- "loss": 0.3809,
2054
- "step": 150500
2055
- },
2056
- {
2057
- "epoch": 0.01,
2058
- "learning_rate": 2.2823215586103048e-05,
2059
- "loss": 0.3803,
2060
- "step": 151000
2061
- },
2062
- {
2063
- "epoch": 0.01,
2064
- "learning_rate": 2.2575898538490455e-05,
2065
- "loss": 0.3791,
2066
- "step": 151500
2067
- },
2068
- {
2069
- "epoch": 0.01,
2070
- "learning_rate": 2.2330605657777174e-05,
2071
- "loss": 0.3789,
2072
- "step": 152000
2073
- },
2074
- {
2075
- "epoch": 0.01,
2076
- "learning_rate": 2.2087352074783502e-05,
2077
- "loss": 0.3785,
2078
- "step": 152500
2079
- },
2080
- {
2081
- "epoch": 0.01,
2082
- "learning_rate": 2.1846152794536308e-05,
2083
- "loss": 0.3783,
2084
- "step": 153000
2085
- },
2086
- {
2087
- "epoch": 0.02,
2088
- "learning_rate": 2.1607022695343304e-05,
2089
- "loss": 0.3779,
2090
- "step": 153500
2091
- },
2092
- {
2093
- "epoch": 0.02,
2094
- "learning_rate": 2.1369976527875413e-05,
2095
- "loss": 0.3786,
2096
- "step": 154000
2097
- },
2098
- {
2099
- "epoch": 0.02,
2100
- "learning_rate": 2.1135496705472888e-05,
2101
- "loss": 0.3786,
2102
- "step": 154500
2103
- },
2104
- {
2105
- "epoch": 0.03,
2106
- "learning_rate": 2.090265789790893e-05,
2107
- "loss": 0.3777,
2108
- "step": 155000
2109
- },
2110
- {
2111
- "epoch": 0.03,
2112
- "eval_loss": 0.3511093854904175,
2113
- "eval_runtime": 298.7311,
2114
- "eval_samples_per_second": 143.942,
2115
- "eval_steps_per_second": 2.25,
2116
- "step": 155000
2117
- },
2118
- {
2119
- "epoch": 0.03,
2120
- "learning_rate": 2.067194647060863e-05,
2121
- "loss": 0.3785,
2122
- "step": 155500
2123
- },
2124
- {
2125
- "epoch": 0.03,
2126
- "learning_rate": 2.044337665493961e-05,
2127
- "loss": 0.3775,
2128
- "step": 156000
2129
- },
2130
- {
2131
- "epoch": 0.03,
2132
- "learning_rate": 2.0216962550164754e-05,
2133
- "loss": 0.376,
2134
- "step": 156500
2135
- },
2136
- {
2137
- "epoch": 0.04,
2138
- "learning_rate": 1.9993164436873545e-05,
2139
- "loss": 0.3785,
2140
- "step": 157000
2141
- },
2142
- {
2143
- "epoch": 0.04,
2144
- "learning_rate": 1.977109913818189e-05,
2145
- "loss": 0.3776,
2146
- "step": 157500
2147
- },
2148
- {
2149
- "epoch": 0.04,
2150
- "learning_rate": 1.9551231019628344e-05,
2151
- "loss": 0.3772,
2152
- "step": 158000
2153
- },
2154
- {
2155
- "epoch": 0.04,
2156
- "learning_rate": 1.933357364371412e-05,
2157
- "loss": 0.3792,
2158
- "step": 158500
2159
- },
2160
- {
2161
- "epoch": 0.04,
2162
- "learning_rate": 1.9118569074409627e-05,
2163
- "loss": 0.3773,
2164
- "step": 159000
2165
- },
2166
- {
2167
- "epoch": 0.05,
2168
- "learning_rate": 1.8905368836887298e-05,
2169
- "loss": 0.3781,
2170
- "step": 159500
2171
- },
2172
- {
2173
- "epoch": 0.05,
2174
- "learning_rate": 1.8694419181825997e-05,
2175
- "loss": 0.376,
2176
- "step": 160000
2177
- },
2178
- {
2179
- "epoch": 0.05,
2180
- "eval_loss": 0.3505874574184418,
2181
- "eval_runtime": 298.148,
2182
- "eval_samples_per_second": 144.224,
2183
- "eval_steps_per_second": 2.254,
2184
- "step": 160000
2185
- },
2186
- {
2187
- "epoch": 0.05,
2188
- "learning_rate": 1.848573312159415e-05,
2189
- "loss": 0.378,
2190
- "step": 160500
2191
- },
2192
- {
2193
- "epoch": 0.06,
2194
- "learning_rate": 1.8279734067715378e-05,
2195
- "loss": 0.3769,
2196
- "step": 161000
2197
- },
2198
- {
2199
- "epoch": 0.06,
2200
- "learning_rate": 1.8075609083923823e-05,
2201
- "loss": 0.3772,
2202
- "step": 161500
2203
- },
2204
- {
2205
- "epoch": 0.06,
2206
- "learning_rate": 1.7873785866083376e-05,
2207
- "loss": 0.3761,
2208
- "step": 162000
2209
- },
2210
- {
2211
- "epoch": 0.06,
2212
- "learning_rate": 1.7674276863600826e-05,
2213
- "loss": 0.3754,
2214
- "step": 162500
2215
- },
2216
- {
2217
- "epoch": 0.07,
2218
- "learning_rate": 1.747709438313117e-05,
2219
- "loss": 0.3761,
2220
- "step": 163000
2221
- },
2222
- {
2223
- "epoch": 0.07,
2224
- "learning_rate": 1.728225058781864e-05,
2225
- "loss": 0.3757,
2226
- "step": 163500
2227
- },
2228
- {
2229
- "epoch": 0.07,
2230
- "learning_rate": 1.7089757496546325e-05,
2231
- "loss": 0.3764,
2232
- "step": 164000
2233
- },
2234
- {
2235
- "epoch": 0.07,
2236
- "learning_rate": 1.6899626983194823e-05,
2237
- "loss": 0.3746,
2238
- "step": 164500
2239
- },
2240
- {
2241
- "epoch": 0.07,
2242
- "learning_rate": 1.6712243911044467e-05,
2243
- "loss": 0.375,
2244
- "step": 165000
2245
- },
2246
- {
2247
- "epoch": 0.07,
2248
- "eval_loss": 0.34934455156326294,
2249
- "eval_runtime": 286.3366,
2250
- "eval_samples_per_second": 150.173,
2251
- "eval_steps_per_second": 2.347,
2252
- "step": 165000
2253
- },
2254
- {
2255
- "epoch": 0.08,
2256
- "learning_rate": 1.6526868808276845e-05,
2257
- "loss": 0.3763,
2258
- "step": 165500
2259
- },
2260
- {
2261
- "epoch": 0.08,
2262
- "learning_rate": 1.6344254560630497e-05,
2263
- "loss": 0.3733,
2264
- "step": 166000
2265
- },
2266
- {
2267
- "epoch": 0.08,
2268
- "learning_rate": 1.616368051555008e-05,
2269
- "loss": 0.3749,
2270
- "step": 166500
2271
- },
2272
- {
2273
- "epoch": 0.09,
2274
- "learning_rate": 1.5985526173180873e-05,
2275
- "loss": 0.3755,
2276
- "step": 167000
2277
- },
2278
- {
2279
- "epoch": 0.09,
2280
- "learning_rate": 1.580980252292188e-05,
2281
- "loss": 0.3754,
2282
- "step": 167500
2283
- },
2284
- {
2285
- "epoch": 0.09,
2286
- "learning_rate": 1.563652040423561e-05,
2287
- "loss": 0.3747,
2288
- "step": 168000
2289
- },
2290
- {
2291
- "epoch": 0.09,
2292
- "learning_rate": 1.546569050597935e-05,
2293
- "loss": 0.3758,
2294
- "step": 168500
2295
- },
2296
- {
2297
- "epoch": 0.1,
2298
- "learning_rate": 1.5297657635265495e-05,
2299
- "loss": 0.3756,
2300
- "step": 169000
2301
- },
2302
- {
2303
- "epoch": 0.1,
2304
- "learning_rate": 1.5131758682182151e-05,
2305
- "loss": 0.375,
2306
- "step": 169500
2307
- },
2308
- {
2309
- "epoch": 0.1,
2310
- "learning_rate": 1.49683430856095e-05,
2311
- "loss": 0.3759,
2312
- "step": 170000
2313
- },
2314
- {
2315
- "epoch": 0.1,
2316
- "eval_loss": 0.3487904369831085,
2317
- "eval_runtime": 298.7435,
2318
- "eval_samples_per_second": 143.936,
2319
- "eval_steps_per_second": 2.249,
2320
- "step": 170000
2321
- },
2322
- {
2323
- "epoch": 0.1,
2324
- "learning_rate": 1.4807420925791258e-05,
2325
- "loss": 0.3746,
2326
- "step": 170500
2327
- },
2328
- {
2329
- "epoch": 0.1,
2330
- "learning_rate": 1.4649002129164283e-05,
2331
- "loss": 0.3739,
2332
- "step": 171000
2333
- },
2334
- {
2335
- "epoch": 0.11,
2336
- "learning_rate": 1.449309646774616e-05,
2337
- "loss": 0.3747,
2338
- "step": 171500
2339
- },
2340
- {
2341
- "epoch": 0.11,
2342
- "learning_rate": 1.4340017800329543e-05,
2343
- "loss": 0.3735,
2344
- "step": 172000
2345
- },
2346
- {
2347
- "epoch": 0.11,
2348
- "learning_rate": 1.4189162030934715e-05,
2349
- "loss": 0.3744,
2350
- "step": 172500
2351
- },
2352
- {
2353
- "epoch": 0.12,
2354
- "learning_rate": 1.4040847761852026e-05,
2355
- "loss": 0.3739,
2356
- "step": 173000
2357
- },
2358
- {
2359
- "epoch": 0.12,
2360
- "learning_rate": 1.3895084141804244e-05,
2361
- "loss": 0.374,
2362
- "step": 173500
2363
- },
2364
- {
2365
- "epoch": 0.12,
2366
- "learning_rate": 1.3751880162178036e-05,
2367
- "loss": 0.3731,
2368
- "step": 174000
2369
- },
2370
- {
2371
- "epoch": 0.12,
2372
- "learning_rate": 1.3611523358351411e-05,
2373
- "loss": 0.3732,
2374
- "step": 174500
2375
- },
2376
- {
2377
- "epoch": 0.12,
2378
- "learning_rate": 1.3473459838770744e-05,
2379
- "loss": 0.3751,
2380
- "step": 175000
2381
- },
2382
- {
2383
- "epoch": 0.12,
2384
- "eval_loss": 0.3473358154296875,
2385
- "eval_runtime": 291.1365,
2386
- "eval_samples_per_second": 147.697,
2387
- "eval_steps_per_second": 2.308,
2388
- "step": 175000
2389
- },
2390
- {
2391
- "epoch": 0.13,
2392
- "learning_rate": 1.3337981967384716e-05,
2393
- "loss": 0.3742,
2394
- "step": 175500
2395
- },
2396
- {
2397
- "epoch": 0.13,
2398
- "learning_rate": 1.3205098101106558e-05,
2399
- "loss": 0.374,
2400
- "step": 176000
2401
- },
2402
- {
2403
- "epoch": 0.13,
2404
- "learning_rate": 1.3074816436839109e-05,
2405
- "loss": 0.3738,
2406
- "step": 176500
2407
- },
2408
- {
2409
- "epoch": 0.14,
2410
- "learning_rate": 1.2947145010969087e-05,
2411
- "loss": 0.3735,
2412
- "step": 177000
2413
- },
2414
- {
2415
- "epoch": 0.14,
2416
- "learning_rate": 1.2822091698871432e-05,
2417
- "loss": 0.3736,
2418
- "step": 177500
2419
- },
2420
- {
2421
- "epoch": 0.14,
2422
- "learning_rate": 1.2699906443769858e-05,
2423
- "loss": 0.3721,
2424
- "step": 178000
2425
- },
2426
- {
2427
- "epoch": 0.14,
2428
- "learning_rate": 1.2580107064687531e-05,
2429
- "loss": 0.3737,
2430
- "step": 178500
2431
- },
2432
- {
2433
- "epoch": 0.14,
2434
- "learning_rate": 1.2462948440006997e-05,
2435
- "loss": 0.3731,
2436
- "step": 179000
2437
- },
2438
- {
2439
- "epoch": 0.15,
2440
- "learning_rate": 1.2348437796624094e-05,
2441
- "loss": 0.3722,
2442
- "step": 179500
2443
- },
2444
- {
2445
- "epoch": 0.15,
2446
- "learning_rate": 1.2236582198094697e-05,
2447
- "loss": 0.3728,
2448
- "step": 180000
2449
- },
2450
- {
2451
- "epoch": 0.15,
2452
- "eval_loss": 0.34677574038505554,
2453
- "eval_runtime": 288.6592,
2454
- "eval_samples_per_second": 148.965,
2455
- "eval_steps_per_second": 2.328,
2456
- "step": 180000
2457
- },
2458
- {
2459
- "epoch": 0.15,
2460
- "learning_rate": 1.2127388544199013e-05,
2461
- "loss": 0.378,
2462
- "step": 180500
2463
- },
2464
- {
2465
- "epoch": 0.15,
2466
- "learning_rate": 1.2020863570515961e-05,
2467
- "loss": 0.3783,
2468
- "step": 181000
2469
- },
2470
- {
2471
- "epoch": 0.16,
2472
- "learning_rate": 1.1917218873266704e-05,
2473
- "loss": 0.3774,
2474
- "step": 181500
2475
- },
2476
- {
2477
- "epoch": 0.16,
2478
- "learning_rate": 1.1816245104688946e-05,
2479
- "loss": 0.3768,
2480
- "step": 182000
2481
- },
2482
- {
2483
- "epoch": 0.16,
2484
- "learning_rate": 1.1717754173131136e-05,
2485
- "loss": 0.378,
2486
- "step": 182500
2487
- },
2488
- {
2489
- "epoch": 0.17,
2490
- "learning_rate": 1.162195718996353e-05,
2491
- "loss": 0.3775,
2492
- "step": 183000
2493
- },
2494
- {
2495
- "epoch": 0.17,
2496
- "learning_rate": 1.1528860064395268e-05,
2497
- "loss": 0.3778,
2498
- "step": 183500
2499
- },
2500
- {
2501
- "epoch": 0.17,
2502
- "learning_rate": 1.14384685390956e-05,
2503
- "loss": 0.377,
2504
- "step": 184000
2505
- },
2506
- {
2507
- "epoch": 0.17,
2508
- "learning_rate": 1.1350788189839584e-05,
2509
- "loss": 0.3769,
2510
- "step": 184500
2511
- },
2512
- {
2513
- "epoch": 0.17,
2514
- "learning_rate": 1.126582442516417e-05,
2515
- "loss": 0.3779,
2516
- "step": 185000
2517
- },
2518
- {
2519
- "epoch": 0.17,
2520
- "eval_loss": 0.3469138443470001,
2521
- "eval_runtime": 287.4474,
2522
- "eval_samples_per_second": 149.593,
2523
- "eval_steps_per_second": 2.338,
2524
- "step": 185000
2525
- },
2526
- {
2527
- "epoch": 0.18,
2528
- "learning_rate": 1.1183582486034581e-05,
2529
- "loss": 0.3766,
2530
- "step": 185500
2531
- },
2532
- {
2533
- "epoch": 0.18,
2534
- "learning_rate": 1.1104067445521018e-05,
2535
- "loss": 0.3776,
2536
- "step": 186000
2537
- },
2538
- {
2539
- "epoch": 0.18,
2540
- "learning_rate": 1.102728420848572e-05,
2541
- "loss": 0.3772,
2542
- "step": 186500
2543
- },
2544
- {
2545
- "epoch": 0.18,
2546
- "learning_rate": 1.0953237511280449e-05,
2547
- "loss": 0.3769,
2548
- "step": 187000
2549
- },
2550
- {
2551
- "epoch": 0.19,
2552
- "learning_rate": 1.0881931921454253e-05,
2553
- "loss": 0.3776,
2554
- "step": 187500
2555
- },
2556
- {
2557
- "epoch": 0.19,
2558
- "learning_rate": 1.0813506214785774e-05,
2559
- "loss": 0.3769,
2560
- "step": 188000
2561
- },
2562
- {
2563
- "epoch": 0.19,
2564
- "learning_rate": 1.0747690362178142e-05,
2565
- "loss": 0.377,
2566
- "step": 188500
2567
- },
2568
- {
2569
- "epoch": 0.2,
2570
- "learning_rate": 1.0684628296065977e-05,
2571
- "loss": 0.3765,
2572
- "step": 189000
2573
- },
2574
- {
2575
- "epoch": 0.2,
2576
- "learning_rate": 1.0624323906414552e-05,
2577
- "loss": 0.376,
2578
- "step": 189500
2579
- },
2580
- {
2581
- "epoch": 0.2,
2582
- "learning_rate": 1.0566780913082688e-05,
2583
- "loss": 0.3777,
2584
- "step": 190000
2585
- },
2586
- {
2587
- "epoch": 0.2,
2588
- "eval_loss": 0.34515419602394104,
2589
- "eval_runtime": 275.3559,
2590
- "eval_samples_per_second": 156.162,
2591
- "eval_steps_per_second": 2.44,
2592
- "step": 190000
2593
- },
2594
- {
2595
- "epoch": 0.0,
2596
- "learning_rate": 1.0512109659997981e-05,
2597
- "loss": 0.3699,
2598
- "step": 190500
2599
- },
2600
- {
2601
- "epoch": 0.01,
2602
- "learning_rate": 1.0460195662993147e-05,
2603
- "loss": 0.3694,
2604
- "step": 191000
2605
- },
2606
- {
2607
- "epoch": 0.01,
2608
- "learning_rate": 1.0410946381032989e-05,
2609
- "loss": 0.3682,
2610
- "step": 191500
2611
- },
2612
- {
2613
- "epoch": 0.01,
2614
- "learning_rate": 1.036447165752325e-05,
2615
- "loss": 0.3681,
2616
- "step": 192000
2617
- },
2618
- {
2619
- "epoch": 0.01,
2620
- "learning_rate": 1.03207743592438e-05,
2621
- "loss": 0.368,
2622
- "step": 192500
2623
- },
2624
- {
2625
- "epoch": 0.01,
2626
- "learning_rate": 1.0279857181649817e-05,
2627
- "loss": 0.3678,
2628
- "step": 193000
2629
- },
2630
- {
2631
- "epoch": 0.02,
2632
- "learning_rate": 1.0241722648705564e-05,
2633
- "loss": 0.3673,
2634
- "step": 193500
2635
- },
2636
- {
2637
- "epoch": 0.02,
2638
- "learning_rate": 1.0206373112728653e-05,
2639
- "loss": 0.3683,
2640
- "step": 194000
2641
- },
2642
- {
2643
- "epoch": 0.02,
2644
- "learning_rate": 1.0173810754244984e-05,
2645
- "loss": 0.3686,
2646
- "step": 194500
2647
- },
2648
- {
2649
- "epoch": 0.03,
2650
- "learning_rate": 1.0144037581854202e-05,
2651
- "loss": 0.3678,
2652
- "step": 195000
2653
- },
2654
- {
2655
- "epoch": 0.03,
2656
- "eval_loss": 0.34533271193504333,
2657
- "eval_runtime": 293.19,
2658
- "eval_samples_per_second": 146.663,
2659
- "eval_steps_per_second": 2.292,
2660
- "step": 195000
2661
- },
2662
- {
2663
- "epoch": 0.03,
2664
- "learning_rate": 1.0117055432105827e-05,
2665
- "loss": 0.3686,
2666
- "step": 195500
2667
- },
2668
- {
2669
- "epoch": 0.03,
2670
- "learning_rate": 1.0092865969385957e-05,
2671
- "loss": 0.3676,
2672
- "step": 196000
2673
- },
2674
- {
2675
- "epoch": 0.03,
2676
- "learning_rate": 1.0071470685814586e-05,
2677
- "loss": 0.3662,
2678
- "step": 196500
2679
- },
2680
- {
2681
- "epoch": 0.04,
2682
- "learning_rate": 1.0052905310023697e-05,
2683
- "loss": 0.3689,
2684
- "step": 197000
2685
- },
2686
- {
2687
- "epoch": 0.04,
2688
- "learning_rate": 1.0037096577272572e-05,
2689
- "loss": 0.368,
2690
- "step": 197500
2691
- },
2692
- {
2693
- "epoch": 0.04,
2694
- "learning_rate": 1.0024085463788748e-05,
2695
- "loss": 0.3678,
2696
- "step": 198000
2697
- },
2698
- {
2699
- "epoch": 0.04,
2700
- "learning_rate": 1.0013872772159007e-05,
2701
- "loss": 0.37,
2702
- "step": 198500
2703
- },
2704
- {
2705
- "epoch": 0.04,
2706
- "learning_rate": 1.0006471165843453e-05,
2707
- "loss": 0.3681,
2708
- "step": 199000
2709
- },
2710
- {
2711
- "epoch": 0.05,
2712
- "learning_rate": 1.000185143580406e-05,
2713
- "loss": 0.369,
2714
- "step": 199500
2715
- },
2716
- {
2717
- "epoch": 0.05,
2718
- "learning_rate": 1.0000031499118314e-05,
2719
- "loss": 0.3671,
2720
- "step": 200000
2721
- },
2722
- {
2723
- "epoch": 0.05,
2724
- "eval_loss": 0.3454923629760742,
2725
- "eval_runtime": 277.6346,
2726
- "eval_samples_per_second": 154.88,
2727
- "eval_steps_per_second": 2.42,
2728
- "step": 200000
2729
- }
2730
- ],
2731
- "max_steps": 200000,
2732
- "num_train_epochs": 9223372036854775807,
2733
- "total_flos": 4.709861347295232e+21,
2734
- "trial_name": null,
2735
- "trial_params": null
2736
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d97d297980a836fa96a55b93de6b63b6bdd01f5d972c55cfc32c86f75c5c6b99
3
- size 5551