Nadav commited on
Commit
732e9fe
1 Parent(s): b012e7f

Training in progress, step 170000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf240eed9e8208c7286897280d42b252096e23bca8816bf42dd7a9b15200c4e7
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3edb0adadd334561a36e43a4212a4e9514b0b74e249ccb77a9f1a398e42f35
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ebdee98f56d146a65b33bc3c6a0ae4cbe41eedc20fc4c696b591b455b690b3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f22bc3eca0ebb0e03246c1e1c410976b12328cb4244ff48da993896225bf4a8
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88004fc539503ca7b97859aa1d0c5a82fcb7f351b44fb5ff1b5865391c2b3cde
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c26ad211585233ad2dccb9325976cb7e71ced6080ea69e9468119115e0c0050
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aa578981512791e6849bdb4bf6d0c6a452082d1d9f87c0e1fc7d58cb03cb03e
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ae036049be18d3db0b069d7cdee2136b0e955af29d02ba238e8d63ec3de1819
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5d370370c39c42f0150b3734d1138af727c24530d2b404ccc308de87353353
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f97d2f076eca9f1526fcbf8cced9c57bd3414f3b9c7f6f76f9126f17c819f1a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05,
5
- "global_step": 160000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2182,11 +2182,147 @@
2182
  "eval_samples_per_second": 144.224,
2183
  "eval_steps_per_second": 2.254,
2184
  "step": 160000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2185
  }
2186
  ],
2187
  "max_steps": 200000,
2188
  "num_train_epochs": 9223372036854775807,
2189
- "total_flos": 3.7678890778361856e+21,
2190
  "trial_name": null,
2191
  "trial_params": null
2192
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1,
5
+ "global_step": 170000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2182
  "eval_samples_per_second": 144.224,
2183
  "eval_steps_per_second": 2.254,
2184
  "step": 160000
2185
+ },
2186
+ {
2187
+ "epoch": 0.05,
2188
+ "learning_rate": 1.848573312159415e-05,
2189
+ "loss": 0.378,
2190
+ "step": 160500
2191
+ },
2192
+ {
2193
+ "epoch": 0.06,
2194
+ "learning_rate": 1.8279734067715378e-05,
2195
+ "loss": 0.3769,
2196
+ "step": 161000
2197
+ },
2198
+ {
2199
+ "epoch": 0.06,
2200
+ "learning_rate": 1.8075609083923823e-05,
2201
+ "loss": 0.3772,
2202
+ "step": 161500
2203
+ },
2204
+ {
2205
+ "epoch": 0.06,
2206
+ "learning_rate": 1.7873785866083376e-05,
2207
+ "loss": 0.3761,
2208
+ "step": 162000
2209
+ },
2210
+ {
2211
+ "epoch": 0.06,
2212
+ "learning_rate": 1.7674276863600826e-05,
2213
+ "loss": 0.3754,
2214
+ "step": 162500
2215
+ },
2216
+ {
2217
+ "epoch": 0.07,
2218
+ "learning_rate": 1.747709438313117e-05,
2219
+ "loss": 0.3761,
2220
+ "step": 163000
2221
+ },
2222
+ {
2223
+ "epoch": 0.07,
2224
+ "learning_rate": 1.728225058781864e-05,
2225
+ "loss": 0.3757,
2226
+ "step": 163500
2227
+ },
2228
+ {
2229
+ "epoch": 0.07,
2230
+ "learning_rate": 1.7089757496546325e-05,
2231
+ "loss": 0.3764,
2232
+ "step": 164000
2233
+ },
2234
+ {
2235
+ "epoch": 0.07,
2236
+ "learning_rate": 1.6899626983194823e-05,
2237
+ "loss": 0.3746,
2238
+ "step": 164500
2239
+ },
2240
+ {
2241
+ "epoch": 0.07,
2242
+ "learning_rate": 1.6712243911044467e-05,
2243
+ "loss": 0.375,
2244
+ "step": 165000
2245
+ },
2246
+ {
2247
+ "epoch": 0.07,
2248
+ "eval_loss": 0.34934455156326294,
2249
+ "eval_runtime": 286.3366,
2250
+ "eval_samples_per_second": 150.173,
2251
+ "eval_steps_per_second": 2.347,
2252
+ "step": 165000
2253
+ },
2254
+ {
2255
+ "epoch": 0.08,
2256
+ "learning_rate": 1.6526868808276845e-05,
2257
+ "loss": 0.3763,
2258
+ "step": 165500
2259
+ },
2260
+ {
2261
+ "epoch": 0.08,
2262
+ "learning_rate": 1.6344254560630497e-05,
2263
+ "loss": 0.3733,
2264
+ "step": 166000
2265
+ },
2266
+ {
2267
+ "epoch": 0.08,
2268
+ "learning_rate": 1.616368051555008e-05,
2269
+ "loss": 0.3749,
2270
+ "step": 166500
2271
+ },
2272
+ {
2273
+ "epoch": 0.09,
2274
+ "learning_rate": 1.5985526173180873e-05,
2275
+ "loss": 0.3755,
2276
+ "step": 167000
2277
+ },
2278
+ {
2279
+ "epoch": 0.09,
2280
+ "learning_rate": 1.580980252292188e-05,
2281
+ "loss": 0.3754,
2282
+ "step": 167500
2283
+ },
2284
+ {
2285
+ "epoch": 0.09,
2286
+ "learning_rate": 1.563652040423561e-05,
2287
+ "loss": 0.3747,
2288
+ "step": 168000
2289
+ },
2290
+ {
2291
+ "epoch": 0.09,
2292
+ "learning_rate": 1.546569050597935e-05,
2293
+ "loss": 0.3758,
2294
+ "step": 168500
2295
+ },
2296
+ {
2297
+ "epoch": 0.1,
2298
+ "learning_rate": 1.5297657635265495e-05,
2299
+ "loss": 0.3756,
2300
+ "step": 169000
2301
+ },
2302
+ {
2303
+ "epoch": 0.1,
2304
+ "learning_rate": 1.5131758682182151e-05,
2305
+ "loss": 0.375,
2306
+ "step": 169500
2307
+ },
2308
+ {
2309
+ "epoch": 0.1,
2310
+ "learning_rate": 1.49683430856095e-05,
2311
+ "loss": 0.3759,
2312
+ "step": 170000
2313
+ },
2314
+ {
2315
+ "epoch": 0.1,
2316
+ "eval_loss": 0.3487904369831085,
2317
+ "eval_runtime": 298.7435,
2318
+ "eval_samples_per_second": 143.936,
2319
+ "eval_steps_per_second": 2.249,
2320
+ "step": 170000
2321
  }
2322
  ],
2323
  "max_steps": 200000,
2324
  "num_train_epochs": 9223372036854775807,
2325
+ "total_flos": 4.003382145200947e+21,
2326
  "trial_name": null,
2327
  "trial_params": null
2328
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ebdee98f56d146a65b33bc3c6a0ae4cbe41eedc20fc4c696b591b455b690b3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f22bc3eca0ebb0e03246c1e1c410976b12328cb4244ff48da993896225bf4a8
3
  size 449471589