Nadav commited on
Commit
1f2bdc9
1 Parent(s): a913fa2

Training in progress, step 190000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17a7e34d71622ed2729105fb01520301222c71cea562f151f7179cc6bc4b9b50
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd03613df05982cc6cd8521404bf2d7d311a82ab0ee46fc664ebdeffd43ec5fb
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30751dbb61ea71a9ce0a93f55b70e1a1196dfc0d7905f81e666b241fe83ddb77
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75854e0ff3e7c4405dc53eac04c2010a206af7aae27dae0d9ee35db9ad0a959a
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75d55dc4383f97263c7bbe8d3a50efcb2e3d7e3cace3b3db13ee7da25b48c2b4
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3586f2d8b7a9d0704645682c4f2d417639e4cca27eecf545ccb9e56c8d74df
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f3b1810a033427b234e2bffc73d00c710ede62d1be2ca4c7afaaf9e382d34f3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38e985eb8bf02ef58974d91bc1d920b2617a41af091b03e6ddbcd3b7548fe4b3
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52c10056239563f1c1ae3a50f3e866a02915fb473e9e6b2140de76aa33912655
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d738b37d6429a4b318ddcdaacb6b35096cf2474500c27a66a5a92064653d6fd
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.15,
5
- "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2454,11 +2454,147 @@
2454
  "eval_samples_per_second": 148.965,
2455
  "eval_steps_per_second": 2.328,
2456
  "step": 180000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2457
  }
2458
  ],
2459
  "max_steps": 200000,
2460
  "num_train_epochs": 9223372036854775807,
2461
- "total_flos": 4.238875212565709e+21,
2462
  "trial_name": null,
2463
  "trial_params": null
2464
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2,
5
+ "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2454
  "eval_samples_per_second": 148.965,
2455
  "eval_steps_per_second": 2.328,
2456
  "step": 180000
2457
+ },
2458
+ {
2459
+ "epoch": 0.15,
2460
+ "learning_rate": 1.2127388544199013e-05,
2461
+ "loss": 0.378,
2462
+ "step": 180500
2463
+ },
2464
+ {
2465
+ "epoch": 0.15,
2466
+ "learning_rate": 1.2020863570515961e-05,
2467
+ "loss": 0.3783,
2468
+ "step": 181000
2469
+ },
2470
+ {
2471
+ "epoch": 0.16,
2472
+ "learning_rate": 1.1917218873266704e-05,
2473
+ "loss": 0.3774,
2474
+ "step": 181500
2475
+ },
2476
+ {
2477
+ "epoch": 0.16,
2478
+ "learning_rate": 1.1816245104688946e-05,
2479
+ "loss": 0.3768,
2480
+ "step": 182000
2481
+ },
2482
+ {
2483
+ "epoch": 0.16,
2484
+ "learning_rate": 1.1717754173131136e-05,
2485
+ "loss": 0.378,
2486
+ "step": 182500
2487
+ },
2488
+ {
2489
+ "epoch": 0.17,
2490
+ "learning_rate": 1.162195718996353e-05,
2491
+ "loss": 0.3775,
2492
+ "step": 183000
2493
+ },
2494
+ {
2495
+ "epoch": 0.17,
2496
+ "learning_rate": 1.1528860064395268e-05,
2497
+ "loss": 0.3778,
2498
+ "step": 183500
2499
+ },
2500
+ {
2501
+ "epoch": 0.17,
2502
+ "learning_rate": 1.14384685390956e-05,
2503
+ "loss": 0.377,
2504
+ "step": 184000
2505
+ },
2506
+ {
2507
+ "epoch": 0.17,
2508
+ "learning_rate": 1.1350788189839584e-05,
2509
+ "loss": 0.3769,
2510
+ "step": 184500
2511
+ },
2512
+ {
2513
+ "epoch": 0.17,
2514
+ "learning_rate": 1.126582442516417e-05,
2515
+ "loss": 0.3779,
2516
+ "step": 185000
2517
+ },
2518
+ {
2519
+ "epoch": 0.17,
2520
+ "eval_loss": 0.3469138443470001,
2521
+ "eval_runtime": 287.4474,
2522
+ "eval_samples_per_second": 149.593,
2523
+ "eval_steps_per_second": 2.338,
2524
+ "step": 185000
2525
+ },
2526
+ {
2527
+ "epoch": 0.18,
2528
+ "learning_rate": 1.1183582486034581e-05,
2529
+ "loss": 0.3766,
2530
+ "step": 185500
2531
+ },
2532
+ {
2533
+ "epoch": 0.18,
2534
+ "learning_rate": 1.1104067445521018e-05,
2535
+ "loss": 0.3776,
2536
+ "step": 186000
2537
+ },
2538
+ {
2539
+ "epoch": 0.18,
2540
+ "learning_rate": 1.102728420848572e-05,
2541
+ "loss": 0.3772,
2542
+ "step": 186500
2543
+ },
2544
+ {
2545
+ "epoch": 0.18,
2546
+ "learning_rate": 1.0953237511280449e-05,
2547
+ "loss": 0.3769,
2548
+ "step": 187000
2549
+ },
2550
+ {
2551
+ "epoch": 0.19,
2552
+ "learning_rate": 1.0881931921454253e-05,
2553
+ "loss": 0.3776,
2554
+ "step": 187500
2555
+ },
2556
+ {
2557
+ "epoch": 0.19,
2558
+ "learning_rate": 1.0813506214785774e-05,
2559
+ "loss": 0.3769,
2560
+ "step": 188000
2561
+ },
2562
+ {
2563
+ "epoch": 0.19,
2564
+ "learning_rate": 1.0747690362178142e-05,
2565
+ "loss": 0.377,
2566
+ "step": 188500
2567
+ },
2568
+ {
2569
+ "epoch": 0.2,
2570
+ "learning_rate": 1.0684628296065977e-05,
2571
+ "loss": 0.3765,
2572
+ "step": 189000
2573
+ },
2574
+ {
2575
+ "epoch": 0.2,
2576
+ "learning_rate": 1.0624323906414552e-05,
2577
+ "loss": 0.376,
2578
+ "step": 189500
2579
+ },
2580
+ {
2581
+ "epoch": 0.2,
2582
+ "learning_rate": 1.0566780913082688e-05,
2583
+ "loss": 0.3777,
2584
+ "step": 190000
2585
+ },
2586
+ {
2587
+ "epoch": 0.2,
2588
+ "eval_loss": 0.34515419602394104,
2589
+ "eval_runtime": 275.3559,
2590
+ "eval_samples_per_second": 156.162,
2591
+ "eval_steps_per_second": 2.44,
2592
+ "step": 190000
2593
  }
2594
  ],
2595
  "max_steps": 200000,
2596
  "num_train_epochs": 9223372036854775807,
2597
+ "total_flos": 4.4743682799304704e+21,
2598
  "trial_name": null,
2599
  "trial_params": null
2600
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30751dbb61ea71a9ce0a93f55b70e1a1196dfc0d7905f81e666b241fe83ddb77
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75854e0ff3e7c4405dc53eac04c2010a206af7aae27dae0d9ee35db9ad0a959a
3
  size 449471589