Training in progress, step 130000

Browse files

Files changed (9) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +139 -3
last-checkpoint/training_args.bin +2 -2
pytorch_model.bin +1 -1
training_args.bin +2 -2

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f843452dd2d44fe58fdd825dcafecbd5ea028e6c09e0d8a6b3384e7f95e1f395
 size 893439185

 version https://git-lfs.github.com/spec/v1
+oid sha256:90898868bd889ea276f797a53d8b665e23905ceffbb0c399cdd9fdf53208357f
 size 893439185

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:547e3f788d184cfbb69d289ec15d38e2ddf96173289926ade07f5513af53666d
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:44e09bdae8806b91fbc10b97bb78e984559d5843803f34f83ae13bec03f9aedd
 size 449471589

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:774912b9aaf57566d96930e8649a92e3d3c64af7753e13704dd13624abab5efb
 size 15587

 version https://git-lfs.github.com/spec/v1
+oid sha256:88004fc539503ca7b97859aa1d0c5a82fcb7f351b44fb5ff1b5865391c2b3cde
 size 15587

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c30a234f84ea7205c00b70e3d45f309d75cec5f3be3db88621b9b0b68f3199c5
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c66d64016310bb2663dcec6c225164473113a433baa255e11fdf3b0cbfe9cd7
 size 559

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50ed9c97014449949451103ed83d187bd2f30103cb5d95bcdb03749d65dbc585
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f5e5f4eb24c3d48634235c80df17254ed52671e30811d1041e058ad2570720f
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2,
-  "global_step": 120000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1638,11 +1638,147 @@
       "eval_samples_per_second": 147.554,
       "eval_steps_per_second": 2.306,
       "step": 120000
     }
   ],
   "max_steps": 200000,
   "num_train_epochs": 9223372036854775807,
-  "total_flos": 2.825916808377139e+21,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.05,
+  "global_step": 130000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 147.554,
       "eval_steps_per_second": 2.306,
       "step": 120000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.105597684039436e-05,
+      "loss": 0.4022,
+      "step": 120500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.071816951499159e-05,
+      "loss": 0.3996,
+      "step": 121000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.038194014319665e-05,
+      "loss": 0.3976,
+      "step": 121500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.00459618594495e-05,
+      "loss": 0.3971,
+      "step": 122000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.971093010680468e-05,
+      "loss": 0.3963,
+      "step": 122500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.937686555159882e-05,
+      "loss": 0.3957,
+      "step": 123000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.9044453954514625e-05,
+      "loss": 0.395,
+      "step": 123500
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.871238351611711e-05,
+      "loss": 0.3956,
+      "step": 124000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.83820029131894e-05,
+      "loss": 0.3954,
+      "step": 124500
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 3.805200836123052e-05,
+      "loss": 0.394,
+      "step": 125000
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 0.3622290790081024,
+      "eval_runtime": 282.4866,
+      "eval_samples_per_second": 152.22,
+      "eval_steps_per_second": 2.379,
+      "step": 125000
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 3.772308333681807e-05,
+      "loss": 0.3947,
+      "step": 125500
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 3.739524812959698e-05,
+      "loss": 0.3934,
+      "step": 126000
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 3.7068522961987034e-05,
+      "loss": 0.3918,
+      "step": 126500
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 3.6742927987935615e-05,
+      "loss": 0.3942,
+      "step": 127000
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 3.6418483291674376e-05,
+      "loss": 0.3934,
+      "step": 127500
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 3.6095208886480486e-05,
+      "loss": 0.3927,
+      "step": 128000
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 3.577312471344201e-05,
+      "loss": 0.3949,
+      "step": 128500
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 3.545225064022787e-05,
+      "loss": 0.3927,
+      "step": 129000
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 3.513324450767193e-05,
+      "loss": 0.3933,
+      "step": 129500
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 3.481484741846708e-05,
+      "loss": 0.3911,
+      "step": 130000
+    },
+    {
+      "epoch": 0.05,
+      "eval_loss": 0.3602633476257324,
+      "eval_runtime": 265.9858,
+      "eval_samples_per_second": 161.663,
+      "eval_steps_per_second": 2.526,
+      "step": 130000
     }
   ],
   "max_steps": 200000,
   "num_train_epochs": 9223372036854775807,
+  "total_flos": 3.061409875741901e+21,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a93cab8b9b5c9ec8f6fac2177aa741a2ccaa1cdf10a6d60061800e462d330bbe
-size 5487

 version https://git-lfs.github.com/spec/v1
+oid sha256:841f2226ec981588ac5154e03a2609704f2d84e74bad6f5a9c0f405b772a0001
+size 5551

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:547e3f788d184cfbb69d289ec15d38e2ddf96173289926ade07f5513af53666d
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:44e09bdae8806b91fbc10b97bb78e984559d5843803f34f83ae13bec03f9aedd
 size 449471589

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a93cab8b9b5c9ec8f6fac2177aa741a2ccaa1cdf10a6d60061800e462d330bbe
-size 5487

 version https://git-lfs.github.com/spec/v1
+oid sha256:841f2226ec981588ac5154e03a2609704f2d84e74bad6f5a9c0f405b772a0001
+size 5551