Nadav commited on
Commit
b43d565
1 Parent(s): b840eb5

Training in progress, step 130000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f843452dd2d44fe58fdd825dcafecbd5ea028e6c09e0d8a6b3384e7f95e1f395
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90898868bd889ea276f797a53d8b665e23905ceffbb0c399cdd9fdf53208357f
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:547e3f788d184cfbb69d289ec15d38e2ddf96173289926ade07f5513af53666d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e09bdae8806b91fbc10b97bb78e984559d5843803f34f83ae13bec03f9aedd
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:774912b9aaf57566d96930e8649a92e3d3c64af7753e13704dd13624abab5efb
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88004fc539503ca7b97859aa1d0c5a82fcb7f351b44fb5ff1b5865391c2b3cde
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c30a234f84ea7205c00b70e3d45f309d75cec5f3be3db88621b9b0b68f3199c5
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c66d64016310bb2663dcec6c225164473113a433baa255e11fdf3b0cbfe9cd7
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50ed9c97014449949451103ed83d187bd2f30103cb5d95bcdb03749d65dbc585
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5e5f4eb24c3d48634235c80df17254ed52671e30811d1041e058ad2570720f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 120000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1638,11 +1638,147 @@
1638
  "eval_samples_per_second": 147.554,
1639
  "eval_steps_per_second": 2.306,
1640
  "step": 120000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1641
  }
1642
  ],
1643
  "max_steps": 200000,
1644
  "num_train_epochs": 9223372036854775807,
1645
- "total_flos": 2.825916808377139e+21,
1646
  "trial_name": null,
1647
  "trial_params": null
1648
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05,
5
+ "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1638
  "eval_samples_per_second": 147.554,
1639
  "eval_steps_per_second": 2.306,
1640
  "step": 120000
1641
+ },
1642
+ {
1643
+ "epoch": 0.0,
1644
+ "learning_rate": 4.105597684039436e-05,
1645
+ "loss": 0.4022,
1646
+ "step": 120500
1647
+ },
1648
+ {
1649
+ "epoch": 0.01,
1650
+ "learning_rate": 4.071816951499159e-05,
1651
+ "loss": 0.3996,
1652
+ "step": 121000
1653
+ },
1654
+ {
1655
+ "epoch": 0.01,
1656
+ "learning_rate": 4.038194014319665e-05,
1657
+ "loss": 0.3976,
1658
+ "step": 121500
1659
+ },
1660
+ {
1661
+ "epoch": 0.01,
1662
+ "learning_rate": 4.00459618594495e-05,
1663
+ "loss": 0.3971,
1664
+ "step": 122000
1665
+ },
1666
+ {
1667
+ "epoch": 0.01,
1668
+ "learning_rate": 3.971093010680468e-05,
1669
+ "loss": 0.3963,
1670
+ "step": 122500
1671
+ },
1672
+ {
1673
+ "epoch": 0.01,
1674
+ "learning_rate": 3.937686555159882e-05,
1675
+ "loss": 0.3957,
1676
+ "step": 123000
1677
+ },
1678
+ {
1679
+ "epoch": 0.02,
1680
+ "learning_rate": 3.9044453954514625e-05,
1681
+ "loss": 0.395,
1682
+ "step": 123500
1683
+ },
1684
+ {
1685
+ "epoch": 0.02,
1686
+ "learning_rate": 3.871238351611711e-05,
1687
+ "loss": 0.3956,
1688
+ "step": 124000
1689
+ },
1690
+ {
1691
+ "epoch": 0.02,
1692
+ "learning_rate": 3.83820029131894e-05,
1693
+ "loss": 0.3954,
1694
+ "step": 124500
1695
+ },
1696
+ {
1697
+ "epoch": 0.03,
1698
+ "learning_rate": 3.805200836123052e-05,
1699
+ "loss": 0.394,
1700
+ "step": 125000
1701
+ },
1702
+ {
1703
+ "epoch": 0.03,
1704
+ "eval_loss": 0.3622290790081024,
1705
+ "eval_runtime": 282.4866,
1706
+ "eval_samples_per_second": 152.22,
1707
+ "eval_steps_per_second": 2.379,
1708
+ "step": 125000
1709
+ },
1710
+ {
1711
+ "epoch": 0.03,
1712
+ "learning_rate": 3.772308333681807e-05,
1713
+ "loss": 0.3947,
1714
+ "step": 125500
1715
+ },
1716
+ {
1717
+ "epoch": 0.03,
1718
+ "learning_rate": 3.739524812959698e-05,
1719
+ "loss": 0.3934,
1720
+ "step": 126000
1721
+ },
1722
+ {
1723
+ "epoch": 0.03,
1724
+ "learning_rate": 3.7068522961987034e-05,
1725
+ "loss": 0.3918,
1726
+ "step": 126500
1727
+ },
1728
+ {
1729
+ "epoch": 0.04,
1730
+ "learning_rate": 3.6742927987935615e-05,
1731
+ "loss": 0.3942,
1732
+ "step": 127000
1733
+ },
1734
+ {
1735
+ "epoch": 0.04,
1736
+ "learning_rate": 3.6418483291674376e-05,
1737
+ "loss": 0.3934,
1738
+ "step": 127500
1739
+ },
1740
+ {
1741
+ "epoch": 0.04,
1742
+ "learning_rate": 3.6095208886480486e-05,
1743
+ "loss": 0.3927,
1744
+ "step": 128000
1745
+ },
1746
+ {
1747
+ "epoch": 0.04,
1748
+ "learning_rate": 3.577312471344201e-05,
1749
+ "loss": 0.3949,
1750
+ "step": 128500
1751
+ },
1752
+ {
1753
+ "epoch": 0.04,
1754
+ "learning_rate": 3.545225064022787e-05,
1755
+ "loss": 0.3927,
1756
+ "step": 129000
1757
+ },
1758
+ {
1759
+ "epoch": 0.05,
1760
+ "learning_rate": 3.513324450767193e-05,
1761
+ "loss": 0.3933,
1762
+ "step": 129500
1763
+ },
1764
+ {
1765
+ "epoch": 0.05,
1766
+ "learning_rate": 3.481484741846708e-05,
1767
+ "loss": 0.3911,
1768
+ "step": 130000
1769
+ },
1770
+ {
1771
+ "epoch": 0.05,
1772
+ "eval_loss": 0.3602633476257324,
1773
+ "eval_runtime": 265.9858,
1774
+ "eval_samples_per_second": 161.663,
1775
+ "eval_steps_per_second": 2.526,
1776
+ "step": 130000
1777
  }
1778
  ],
1779
  "max_steps": 200000,
1780
  "num_train_epochs": 9223372036854775807,
1781
+ "total_flos": 3.061409875741901e+21,
1782
  "trial_name": null,
1783
  "trial_params": null
1784
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a93cab8b9b5c9ec8f6fac2177aa741a2ccaa1cdf10a6d60061800e462d330bbe
3
- size 5487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841f2226ec981588ac5154e03a2609704f2d84e74bad6f5a9c0f405b772a0001
3
+ size 5551
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:547e3f788d184cfbb69d289ec15d38e2ddf96173289926ade07f5513af53666d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e09bdae8806b91fbc10b97bb78e984559d5843803f34f83ae13bec03f9aedd
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a93cab8b9b5c9ec8f6fac2177aa741a2ccaa1cdf10a6d60061800e462d330bbe
3
- size 5487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841f2226ec981588ac5154e03a2609704f2d84e74bad6f5a9c0f405b772a0001
3
+ size 5551