Nadav commited on
Commit
d3124d3
1 Parent(s): 1f2bdc9

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd03613df05982cc6cd8521404bf2d7d311a82ab0ee46fc664ebdeffd43ec5fb
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3278127396f5d3f8558d8e933cd802503bc96eb286ac4ab8615fdd148bdb7c65
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75854e0ff3e7c4405dc53eac04c2010a206af7aae27dae0d9ee35db9ad0a959a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55e8b51abb099c47f1d26ef4448005ca8276d7a11a4fd49a19180fc80b2825a
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed3586f2d8b7a9d0704645682c4f2d417639e4cca27eecf545ccb9e56c8d74df
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88004fc539503ca7b97859aa1d0c5a82fcb7f351b44fb5ff1b5865391c2b3cde
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38e985eb8bf02ef58974d91bc1d920b2617a41af091b03e6ddbcd3b7548fe4b3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccdf82dc05f3b4b6efaa6b42846dbff856f1303f57b4b6c56f7597dda3131a18
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d738b37d6429a4b318ddcdaacb6b35096cf2474500c27a66a5a92064653d6fd
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de21128fb4d2d9fbd6335f650a62e3e1299cfe449b8f64957937f253cda36cc0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2590,11 +2590,147 @@
2590
  "eval_samples_per_second": 156.162,
2591
  "eval_steps_per_second": 2.44,
2592
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2593
  }
2594
  ],
2595
  "max_steps": 200000,
2596
  "num_train_epochs": 9223372036854775807,
2597
- "total_flos": 4.4743682799304704e+21,
2598
  "trial_name": null,
2599
  "trial_params": null
2600
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2590
  "eval_samples_per_second": 156.162,
2591
  "eval_steps_per_second": 2.44,
2592
  "step": 190000
2593
+ },
2594
+ {
2595
+ "epoch": 0.0,
2596
+ "learning_rate": 1.0512109659997981e-05,
2597
+ "loss": 0.3699,
2598
+ "step": 190500
2599
+ },
2600
+ {
2601
+ "epoch": 0.01,
2602
+ "learning_rate": 1.0460195662993147e-05,
2603
+ "loss": 0.3694,
2604
+ "step": 191000
2605
+ },
2606
+ {
2607
+ "epoch": 0.01,
2608
+ "learning_rate": 1.0410946381032989e-05,
2609
+ "loss": 0.3682,
2610
+ "step": 191500
2611
+ },
2612
+ {
2613
+ "epoch": 0.01,
2614
+ "learning_rate": 1.036447165752325e-05,
2615
+ "loss": 0.3681,
2616
+ "step": 192000
2617
+ },
2618
+ {
2619
+ "epoch": 0.01,
2620
+ "learning_rate": 1.03207743592438e-05,
2621
+ "loss": 0.368,
2622
+ "step": 192500
2623
+ },
2624
+ {
2625
+ "epoch": 0.01,
2626
+ "learning_rate": 1.0279857181649817e-05,
2627
+ "loss": 0.3678,
2628
+ "step": 193000
2629
+ },
2630
+ {
2631
+ "epoch": 0.02,
2632
+ "learning_rate": 1.0241722648705564e-05,
2633
+ "loss": 0.3673,
2634
+ "step": 193500
2635
+ },
2636
+ {
2637
+ "epoch": 0.02,
2638
+ "learning_rate": 1.0206373112728653e-05,
2639
+ "loss": 0.3683,
2640
+ "step": 194000
2641
+ },
2642
+ {
2643
+ "epoch": 0.02,
2644
+ "learning_rate": 1.0173810754244984e-05,
2645
+ "loss": 0.3686,
2646
+ "step": 194500
2647
+ },
2648
+ {
2649
+ "epoch": 0.03,
2650
+ "learning_rate": 1.0144037581854202e-05,
2651
+ "loss": 0.3678,
2652
+ "step": 195000
2653
+ },
2654
+ {
2655
+ "epoch": 0.03,
2656
+ "eval_loss": 0.34533271193504333,
2657
+ "eval_runtime": 293.19,
2658
+ "eval_samples_per_second": 146.663,
2659
+ "eval_steps_per_second": 2.292,
2660
+ "step": 195000
2661
+ },
2662
+ {
2663
+ "epoch": 0.03,
2664
+ "learning_rate": 1.0117055432105827e-05,
2665
+ "loss": 0.3686,
2666
+ "step": 195500
2667
+ },
2668
+ {
2669
+ "epoch": 0.03,
2670
+ "learning_rate": 1.0092865969385957e-05,
2671
+ "loss": 0.3676,
2672
+ "step": 196000
2673
+ },
2674
+ {
2675
+ "epoch": 0.03,
2676
+ "learning_rate": 1.0071470685814586e-05,
2677
+ "loss": 0.3662,
2678
+ "step": 196500
2679
+ },
2680
+ {
2681
+ "epoch": 0.04,
2682
+ "learning_rate": 1.0052905310023697e-05,
2683
+ "loss": 0.3689,
2684
+ "step": 197000
2685
+ },
2686
+ {
2687
+ "epoch": 0.04,
2688
+ "learning_rate": 1.0037096577272572e-05,
2689
+ "loss": 0.368,
2690
+ "step": 197500
2691
+ },
2692
+ {
2693
+ "epoch": 0.04,
2694
+ "learning_rate": 1.0024085463788748e-05,
2695
+ "loss": 0.3678,
2696
+ "step": 198000
2697
+ },
2698
+ {
2699
+ "epoch": 0.04,
2700
+ "learning_rate": 1.0013872772159007e-05,
2701
+ "loss": 0.37,
2702
+ "step": 198500
2703
+ },
2704
+ {
2705
+ "epoch": 0.04,
2706
+ "learning_rate": 1.0006471165843453e-05,
2707
+ "loss": 0.3681,
2708
+ "step": 199000
2709
+ },
2710
+ {
2711
+ "epoch": 0.05,
2712
+ "learning_rate": 1.000185143580406e-05,
2713
+ "loss": 0.369,
2714
+ "step": 199500
2715
+ },
2716
+ {
2717
+ "epoch": 0.05,
2718
+ "learning_rate": 1.0000031499118314e-05,
2719
+ "loss": 0.3671,
2720
+ "step": 200000
2721
+ },
2722
+ {
2723
+ "epoch": 0.05,
2724
+ "eval_loss": 0.3454923629760742,
2725
+ "eval_runtime": 277.6346,
2726
+ "eval_samples_per_second": 154.88,
2727
+ "eval_steps_per_second": 2.42,
2728
+ "step": 200000
2729
  }
2730
  ],
2731
  "max_steps": 200000,
2732
  "num_train_epochs": 9223372036854775807,
2733
+ "total_flos": 4.709861347295232e+21,
2734
  "trial_name": null,
2735
  "trial_params": null
2736
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c8d520d84739d0748a1ea61094afdc642629ce6412053854db2541abbd8d18
3
  size 5551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97d297980a836fa96a55b93de6b63b6bdd01f5d972c55cfc32c86f75c5c6b99
3
  size 5551
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75854e0ff3e7c4405dc53eac04c2010a206af7aae27dae0d9ee35db9ad0a959a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55e8b51abb099c47f1d26ef4448005ca8276d7a11a4fd49a19180fc80b2825a
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c8d520d84739d0748a1ea61094afdc642629ce6412053854db2541abbd8d18
3
  size 5551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97d297980a836fa96a55b93de6b63b6bdd01f5d972c55cfc32c86f75c5c6b99
3
  size 5551