Nadav commited on
Commit
a913fa2
1 Parent(s): 732e9fe

Training in progress, step 180000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b3edb0adadd334561a36e43a4212a4e9514b0b74e249ccb77a9f1a398e42f35
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a7e34d71622ed2729105fb01520301222c71cea562f151f7179cc6bc4b9b50
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f22bc3eca0ebb0e03246c1e1c410976b12328cb4244ff48da993896225bf4a8
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30751dbb61ea71a9ce0a93f55b70e1a1196dfc0d7905f81e666b241fe83ddb77
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c26ad211585233ad2dccb9325976cb7e71ced6080ea69e9468119115e0c0050
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d55dc4383f97263c7bbe8d3a50efcb2e3d7e3cace3b3db13ee7da25b48c2b4
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ae036049be18d3db0b069d7cdee2136b0e955af29d02ba238e8d63ec3de1819
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3b1810a033427b234e2bffc73d00c710ede62d1be2ca4c7afaaf9e382d34f3
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f97d2f076eca9f1526fcbf8cced9c57bd3414f3b9c7f6f76f9126f17c819f1a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52c10056239563f1c1ae3a50f3e866a02915fb473e9e6b2140de76aa33912655
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1,
5
- "global_step": 170000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2318,11 +2318,147 @@
2318
  "eval_samples_per_second": 143.936,
2319
  "eval_steps_per_second": 2.249,
2320
  "step": 170000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2321
  }
2322
  ],
2323
  "max_steps": 200000,
2324
  "num_train_epochs": 9223372036854775807,
2325
- "total_flos": 4.003382145200947e+21,
2326
  "trial_name": null,
2327
  "trial_params": null
2328
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.15,
5
+ "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2318
  "eval_samples_per_second": 143.936,
2319
  "eval_steps_per_second": 2.249,
2320
  "step": 170000
2321
+ },
2322
+ {
2323
+ "epoch": 0.1,
2324
+ "learning_rate": 1.4807420925791258e-05,
2325
+ "loss": 0.3746,
2326
+ "step": 170500
2327
+ },
2328
+ {
2329
+ "epoch": 0.1,
2330
+ "learning_rate": 1.4649002129164283e-05,
2331
+ "loss": 0.3739,
2332
+ "step": 171000
2333
+ },
2334
+ {
2335
+ "epoch": 0.11,
2336
+ "learning_rate": 1.449309646774616e-05,
2337
+ "loss": 0.3747,
2338
+ "step": 171500
2339
+ },
2340
+ {
2341
+ "epoch": 0.11,
2342
+ "learning_rate": 1.4340017800329543e-05,
2343
+ "loss": 0.3735,
2344
+ "step": 172000
2345
+ },
2346
+ {
2347
+ "epoch": 0.11,
2348
+ "learning_rate": 1.4189162030934715e-05,
2349
+ "loss": 0.3744,
2350
+ "step": 172500
2351
+ },
2352
+ {
2353
+ "epoch": 0.12,
2354
+ "learning_rate": 1.4040847761852026e-05,
2355
+ "loss": 0.3739,
2356
+ "step": 173000
2357
+ },
2358
+ {
2359
+ "epoch": 0.12,
2360
+ "learning_rate": 1.3895084141804244e-05,
2361
+ "loss": 0.374,
2362
+ "step": 173500
2363
+ },
2364
+ {
2365
+ "epoch": 0.12,
2366
+ "learning_rate": 1.3751880162178036e-05,
2367
+ "loss": 0.3731,
2368
+ "step": 174000
2369
+ },
2370
+ {
2371
+ "epoch": 0.12,
2372
+ "learning_rate": 1.3611523358351411e-05,
2373
+ "loss": 0.3732,
2374
+ "step": 174500
2375
+ },
2376
+ {
2377
+ "epoch": 0.12,
2378
+ "learning_rate": 1.3473459838770744e-05,
2379
+ "loss": 0.3751,
2380
+ "step": 175000
2381
+ },
2382
+ {
2383
+ "epoch": 0.12,
2384
+ "eval_loss": 0.3473358154296875,
2385
+ "eval_runtime": 291.1365,
2386
+ "eval_samples_per_second": 147.697,
2387
+ "eval_steps_per_second": 2.308,
2388
+ "step": 175000
2389
+ },
2390
+ {
2391
+ "epoch": 0.13,
2392
+ "learning_rate": 1.3337981967384716e-05,
2393
+ "loss": 0.3742,
2394
+ "step": 175500
2395
+ },
2396
+ {
2397
+ "epoch": 0.13,
2398
+ "learning_rate": 1.3205098101106558e-05,
2399
+ "loss": 0.374,
2400
+ "step": 176000
2401
+ },
2402
+ {
2403
+ "epoch": 0.13,
2404
+ "learning_rate": 1.3074816436839109e-05,
2405
+ "loss": 0.3738,
2406
+ "step": 176500
2407
+ },
2408
+ {
2409
+ "epoch": 0.14,
2410
+ "learning_rate": 1.2947145010969087e-05,
2411
+ "loss": 0.3735,
2412
+ "step": 177000
2413
+ },
2414
+ {
2415
+ "epoch": 0.14,
2416
+ "learning_rate": 1.2822091698871432e-05,
2417
+ "loss": 0.3736,
2418
+ "step": 177500
2419
+ },
2420
+ {
2421
+ "epoch": 0.14,
2422
+ "learning_rate": 1.2699906443769858e-05,
2423
+ "loss": 0.3721,
2424
+ "step": 178000
2425
+ },
2426
+ {
2427
+ "epoch": 0.14,
2428
+ "learning_rate": 1.2580107064687531e-05,
2429
+ "loss": 0.3737,
2430
+ "step": 178500
2431
+ },
2432
+ {
2433
+ "epoch": 0.14,
2434
+ "learning_rate": 1.2462948440006997e-05,
2435
+ "loss": 0.3731,
2436
+ "step": 179000
2437
+ },
2438
+ {
2439
+ "epoch": 0.15,
2440
+ "learning_rate": 1.2348437796624094e-05,
2441
+ "loss": 0.3722,
2442
+ "step": 179500
2443
+ },
2444
+ {
2445
+ "epoch": 0.15,
2446
+ "learning_rate": 1.2236582198094697e-05,
2447
+ "loss": 0.3728,
2448
+ "step": 180000
2449
+ },
2450
+ {
2451
+ "epoch": 0.15,
2452
+ "eval_loss": 0.34677574038505554,
2453
+ "eval_runtime": 288.6592,
2454
+ "eval_samples_per_second": 148.965,
2455
+ "eval_steps_per_second": 2.328,
2456
+ "step": 180000
2457
  }
2458
  ],
2459
  "max_steps": 200000,
2460
  "num_train_epochs": 9223372036854775807,
2461
+ "total_flos": 4.238875212565709e+21,
2462
  "trial_name": null,
2463
  "trial_params": null
2464
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f22bc3eca0ebb0e03246c1e1c410976b12328cb4244ff48da993896225bf4a8
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30751dbb61ea71a9ce0a93f55b70e1a1196dfc0d7905f81e666b241fe83ddb77
3
  size 449471589