sedrickkeh commited on
Commit
569dac6
·
verified ·
1 Parent(s): e43b70f

Training in progress, epoch 4

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d8aa58f9f5c469db326a8c23c75c43566099809ac74363f962a168769459a9a
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32584ff74829ef6d25dd444b927b40d9c2f3c65c13f83eeca11759d9c0fa4bc
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5feecaccb79460c86e1674dd0b93582b9e63d79eef484b470cb2f889fe267c0
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7152740923a6f105daa1d052b5c7b1f00579a8db40370c936401475b3397614c
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11eb0bec53bfe9a5bc692bfa554b75a63f2b496789320b2b5c45a97041746a5c
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7e927606cafa755a84206b47fe7dd6afe747090d14eaf9b2ed64fdce5fb96f
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0bfbd1ce62d7bf23eaee626db92851ab885f880cc0409c73ad1398df4db2311
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00c66d7324409d6d79a23be44b2a75fdd823641824fc6f1aac931d4651048cbf
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -222,3 +222,59 @@
222
  {"current_steps": 2190, "total_steps": 4392, "loss": 0.6324, "lr": 5e-06, "epoch": 3.987255348202094, "percentage": 49.86, "elapsed_time": "19:55:44", "remaining_time": "20:02:17"}
223
  {"current_steps": 2197, "total_steps": 4392, "eval_loss": 0.7583181262016296, "epoch": 4.0, "percentage": 50.02, "elapsed_time": "20:05:43", "remaining_time": "20:04:37"}
224
  {"current_steps": 2200, "total_steps": 4392, "loss": 0.6642, "lr": 5e-06, "epoch": 4.005461993627674, "percentage": 50.09, "elapsed_time": "20:08:36", "remaining_time": "20:04:12"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  {"current_steps": 2190, "total_steps": 4392, "loss": 0.6324, "lr": 5e-06, "epoch": 3.987255348202094, "percentage": 49.86, "elapsed_time": "19:55:44", "remaining_time": "20:02:17"}
223
  {"current_steps": 2197, "total_steps": 4392, "eval_loss": 0.7583181262016296, "epoch": 4.0, "percentage": 50.02, "elapsed_time": "20:05:43", "remaining_time": "20:04:37"}
224
  {"current_steps": 2200, "total_steps": 4392, "loss": 0.6642, "lr": 5e-06, "epoch": 4.005461993627674, "percentage": 50.09, "elapsed_time": "20:08:36", "remaining_time": "20:04:12"}
225
+ {"current_steps": 2210, "total_steps": 4392, "loss": 0.5747, "lr": 5e-06, "epoch": 4.023668639053255, "percentage": 50.32, "elapsed_time": "20:13:58", "remaining_time": "19:58:35"}
226
+ {"current_steps": 2220, "total_steps": 4392, "loss": 0.5739, "lr": 5e-06, "epoch": 4.041875284478834, "percentage": 50.55, "elapsed_time": "20:19:19", "remaining_time": "19:52:57"}
227
+ {"current_steps": 2230, "total_steps": 4392, "loss": 0.5717, "lr": 5e-06, "epoch": 4.060081929904415, "percentage": 50.77, "elapsed_time": "20:24:40", "remaining_time": "19:47:20"}
228
+ {"current_steps": 2240, "total_steps": 4392, "loss": 0.577, "lr": 5e-06, "epoch": 4.078288575329996, "percentage": 51.0, "elapsed_time": "20:30:01", "remaining_time": "19:41:42"}
229
+ {"current_steps": 2250, "total_steps": 4392, "loss": 0.5772, "lr": 5e-06, "epoch": 4.096495220755576, "percentage": 51.23, "elapsed_time": "20:35:23", "remaining_time": "19:36:05"}
230
+ {"current_steps": 2260, "total_steps": 4392, "loss": 0.5787, "lr": 5e-06, "epoch": 4.114701866181156, "percentage": 51.46, "elapsed_time": "20:40:45", "remaining_time": "19:30:29"}
231
+ {"current_steps": 2270, "total_steps": 4392, "loss": 0.5782, "lr": 5e-06, "epoch": 4.132908511606736, "percentage": 51.68, "elapsed_time": "20:46:06", "remaining_time": "19:24:52"}
232
+ {"current_steps": 2280, "total_steps": 4392, "loss": 0.5794, "lr": 5e-06, "epoch": 4.151115157032317, "percentage": 51.91, "elapsed_time": "20:51:28", "remaining_time": "19:19:16"}
233
+ {"current_steps": 2290, "total_steps": 4392, "loss": 0.5806, "lr": 5e-06, "epoch": 4.1693218024578975, "percentage": 52.14, "elapsed_time": "20:56:50", "remaining_time": "19:13:39"}
234
+ {"current_steps": 2300, "total_steps": 4392, "loss": 0.5797, "lr": 5e-06, "epoch": 4.187528447883477, "percentage": 52.37, "elapsed_time": "21:02:12", "remaining_time": "19:08:03"}
235
+ {"current_steps": 2310, "total_steps": 4392, "loss": 0.5767, "lr": 5e-06, "epoch": 4.205735093309058, "percentage": 52.6, "elapsed_time": "21:07:33", "remaining_time": "19:02:27"}
236
+ {"current_steps": 2320, "total_steps": 4392, "loss": 0.5778, "lr": 5e-06, "epoch": 4.223941738734638, "percentage": 52.82, "elapsed_time": "21:12:53", "remaining_time": "18:56:49"}
237
+ {"current_steps": 2330, "total_steps": 4392, "loss": 0.5786, "lr": 5e-06, "epoch": 4.2421483841602186, "percentage": 53.05, "elapsed_time": "21:18:13", "remaining_time": "18:51:11"}
238
+ {"current_steps": 2340, "total_steps": 4392, "loss": 0.5824, "lr": 5e-06, "epoch": 4.260355029585799, "percentage": 53.28, "elapsed_time": "21:23:35", "remaining_time": "18:45:36"}
239
+ {"current_steps": 2350, "total_steps": 4392, "loss": 0.578, "lr": 5e-06, "epoch": 4.278561675011379, "percentage": 53.51, "elapsed_time": "21:28:56", "remaining_time": "18:40:00"}
240
+ {"current_steps": 2360, "total_steps": 4392, "loss": 0.5767, "lr": 5e-06, "epoch": 4.29676832043696, "percentage": 53.73, "elapsed_time": "21:34:17", "remaining_time": "18:34:24"}
241
+ {"current_steps": 2370, "total_steps": 4392, "loss": 0.5828, "lr": 5e-06, "epoch": 4.31497496586254, "percentage": 53.96, "elapsed_time": "21:39:38", "remaining_time": "18:28:48"}
242
+ {"current_steps": 2380, "total_steps": 4392, "loss": 0.5845, "lr": 5e-06, "epoch": 4.33318161128812, "percentage": 54.19, "elapsed_time": "21:44:59", "remaining_time": "18:23:12"}
243
+ {"current_steps": 2390, "total_steps": 4392, "loss": 0.5831, "lr": 5e-06, "epoch": 4.3513882567137, "percentage": 54.42, "elapsed_time": "21:50:21", "remaining_time": "18:17:37"}
244
+ {"current_steps": 2400, "total_steps": 4392, "loss": 0.5838, "lr": 5e-06, "epoch": 4.369594902139281, "percentage": 54.64, "elapsed_time": "21:55:42", "remaining_time": "18:12:02"}
245
+ {"current_steps": 2410, "total_steps": 4392, "loss": 0.5838, "lr": 5e-06, "epoch": 4.3878015475648615, "percentage": 54.87, "elapsed_time": "22:01:00", "remaining_time": "18:06:24"}
246
+ {"current_steps": 2420, "total_steps": 4392, "loss": 0.5873, "lr": 5e-06, "epoch": 4.406008192990441, "percentage": 55.1, "elapsed_time": "22:06:20", "remaining_time": "18:00:48"}
247
+ {"current_steps": 2430, "total_steps": 4392, "loss": 0.5862, "lr": 5e-06, "epoch": 4.424214838416022, "percentage": 55.33, "elapsed_time": "22:11:41", "remaining_time": "17:55:13"}
248
+ {"current_steps": 2440, "total_steps": 4392, "loss": 0.5825, "lr": 5e-06, "epoch": 4.442421483841602, "percentage": 55.56, "elapsed_time": "22:17:03", "remaining_time": "17:49:38"}
249
+ {"current_steps": 2450, "total_steps": 4392, "loss": 0.5854, "lr": 5e-06, "epoch": 4.4606281292671826, "percentage": 55.78, "elapsed_time": "22:22:23", "remaining_time": "17:44:02"}
250
+ {"current_steps": 2460, "total_steps": 4392, "loss": 0.5866, "lr": 5e-06, "epoch": 4.478834774692763, "percentage": 56.01, "elapsed_time": "22:27:41", "remaining_time": "17:38:26"}
251
+ {"current_steps": 2470, "total_steps": 4392, "loss": 0.5869, "lr": 5e-06, "epoch": 4.497041420118343, "percentage": 56.24, "elapsed_time": "22:33:01", "remaining_time": "17:32:50"}
252
+ {"current_steps": 2480, "total_steps": 4392, "loss": 0.5873, "lr": 5e-06, "epoch": 4.515248065543924, "percentage": 56.47, "elapsed_time": "22:38:22", "remaining_time": "17:27:16"}
253
+ {"current_steps": 2490, "total_steps": 4392, "loss": 0.5832, "lr": 5e-06, "epoch": 4.533454710969504, "percentage": 56.69, "elapsed_time": "22:43:44", "remaining_time": "17:21:42"}
254
+ {"current_steps": 2500, "total_steps": 4392, "loss": 0.5878, "lr": 5e-06, "epoch": 4.551661356395084, "percentage": 56.92, "elapsed_time": "22:49:06", "remaining_time": "17:16:08"}
255
+ {"current_steps": 2510, "total_steps": 4392, "loss": 0.5844, "lr": 5e-06, "epoch": 4.569868001820664, "percentage": 57.15, "elapsed_time": "22:54:28", "remaining_time": "17:10:34"}
256
+ {"current_steps": 2520, "total_steps": 4392, "loss": 0.5883, "lr": 5e-06, "epoch": 4.588074647246245, "percentage": 57.38, "elapsed_time": "22:59:51", "remaining_time": "17:05:01"}
257
+ {"current_steps": 2530, "total_steps": 4392, "loss": 0.5914, "lr": 5e-06, "epoch": 4.6062812926718255, "percentage": 57.6, "elapsed_time": "23:05:11", "remaining_time": "16:59:27"}
258
+ {"current_steps": 2540, "total_steps": 4392, "loss": 0.5882, "lr": 5e-06, "epoch": 4.624487938097405, "percentage": 57.83, "elapsed_time": "23:10:32", "remaining_time": "16:53:53"}
259
+ {"current_steps": 2550, "total_steps": 4392, "loss": 0.5865, "lr": 5e-06, "epoch": 4.642694583522986, "percentage": 58.06, "elapsed_time": "23:15:55", "remaining_time": "16:48:20"}
260
+ {"current_steps": 2560, "total_steps": 4392, "loss": 0.5885, "lr": 5e-06, "epoch": 4.660901228948566, "percentage": 58.29, "elapsed_time": "23:21:16", "remaining_time": "16:42:47"}
261
+ {"current_steps": 2570, "total_steps": 4392, "loss": 0.585, "lr": 5e-06, "epoch": 4.6791078743741465, "percentage": 58.52, "elapsed_time": "23:26:37", "remaining_time": "16:37:13"}
262
+ {"current_steps": 2580, "total_steps": 4392, "loss": 0.5874, "lr": 5e-06, "epoch": 4.697314519799727, "percentage": 58.74, "elapsed_time": "23:31:59", "remaining_time": "16:31:40"}
263
+ {"current_steps": 2590, "total_steps": 4392, "loss": 0.5886, "lr": 5e-06, "epoch": 4.715521165225307, "percentage": 58.97, "elapsed_time": "23:37:21", "remaining_time": "16:26:08"}
264
+ {"current_steps": 2600, "total_steps": 4392, "loss": 0.5931, "lr": 5e-06, "epoch": 4.733727810650888, "percentage": 59.2, "elapsed_time": "23:42:44", "remaining_time": "16:20:35"}
265
+ {"current_steps": 2610, "total_steps": 4392, "loss": 0.5877, "lr": 5e-06, "epoch": 4.751934456076468, "percentage": 59.43, "elapsed_time": "23:48:06", "remaining_time": "16:15:03"}
266
+ {"current_steps": 2620, "total_steps": 4392, "loss": 0.5901, "lr": 5e-06, "epoch": 4.770141101502048, "percentage": 59.65, "elapsed_time": "23:53:29", "remaining_time": "16:09:31"}
267
+ {"current_steps": 2630, "total_steps": 4392, "loss": 0.5877, "lr": 5e-06, "epoch": 4.788347746927629, "percentage": 59.88, "elapsed_time": "23:58:51", "remaining_time": "16:03:59"}
268
+ {"current_steps": 2640, "total_steps": 4392, "loss": 0.593, "lr": 5e-06, "epoch": 4.806554392353209, "percentage": 60.11, "elapsed_time": "1 day, 0:04:14", "remaining_time": "15:58:26"}
269
+ {"current_steps": 2650, "total_steps": 4392, "loss": 0.592, "lr": 5e-06, "epoch": 4.8247610377787895, "percentage": 60.34, "elapsed_time": "1 day, 0:09:34", "remaining_time": "15:52:53"}
270
+ {"current_steps": 2660, "total_steps": 4392, "loss": 0.5923, "lr": 5e-06, "epoch": 4.842967683204369, "percentage": 60.56, "elapsed_time": "1 day, 0:14:56", "remaining_time": "15:47:20"}
271
+ {"current_steps": 2670, "total_steps": 4392, "loss": 0.5906, "lr": 5e-06, "epoch": 4.86117432862995, "percentage": 60.79, "elapsed_time": "1 day, 0:20:18", "remaining_time": "15:41:48"}
272
+ {"current_steps": 2680, "total_steps": 4392, "loss": 0.5943, "lr": 5e-06, "epoch": 4.879380974055531, "percentage": 61.02, "elapsed_time": "1 day, 0:25:40", "remaining_time": "15:36:16"}
273
+ {"current_steps": 2690, "total_steps": 4392, "loss": 0.5909, "lr": 5e-06, "epoch": 4.8975876194811105, "percentage": 61.25, "elapsed_time": "1 day, 0:31:03", "remaining_time": "15:30:45"}
274
+ {"current_steps": 2700, "total_steps": 4392, "loss": 0.5938, "lr": 5e-06, "epoch": 4.915794264906691, "percentage": 61.48, "elapsed_time": "1 day, 0:36:24", "remaining_time": "15:25:12"}
275
+ {"current_steps": 2710, "total_steps": 4392, "loss": 0.5896, "lr": 5e-06, "epoch": 4.934000910332271, "percentage": 61.7, "elapsed_time": "1 day, 0:41:43", "remaining_time": "15:19:39"}
276
+ {"current_steps": 2720, "total_steps": 4392, "loss": 0.5925, "lr": 5e-06, "epoch": 4.952207555757852, "percentage": 61.93, "elapsed_time": "1 day, 0:47:04", "remaining_time": "15:14:06"}
277
+ {"current_steps": 2730, "total_steps": 4392, "loss": 0.587, "lr": 5e-06, "epoch": 4.970414201183432, "percentage": 62.16, "elapsed_time": "1 day, 0:52:26", "remaining_time": "15:08:34"}
278
+ {"current_steps": 2740, "total_steps": 4392, "loss": 0.5947, "lr": 5e-06, "epoch": 4.988620846609012, "percentage": 62.39, "elapsed_time": "1 day, 0:57:47", "remaining_time": "15:03:03"}
279
+ {"current_steps": 2746, "total_steps": 4392, "eval_loss": 0.7756755352020264, "epoch": 4.999544833864361, "percentage": 62.52, "elapsed_time": "1 day, 1:07:27", "remaining_time": "15:03:35"}
280
+ {"current_steps": 2750, "total_steps": 4392, "loss": 0.6175, "lr": 5e-06, "epoch": 5.006827492034593, "percentage": 62.61, "elapsed_time": "1 day, 1:10:47", "remaining_time": "15:02:04"}