ahmedheakl commited on
Commit
b552e2f
·
verified ·
1 Parent(s): 7f40bf6

Training in progress, step 1000

Browse files
added_tokens.json CHANGED
@@ -21,53 +21,53 @@
21
  "<|vision_end|>": 151653,
22
  "<|vision_pad|>": 151654,
23
  "<|vision_start|>": 151652,
24
- "QWORD": 151686,
25
- "beq": 151705,
26
- "bge": 151703,
27
- "bgt": 151712,
28
- "blt": 151681,
29
- "blx": 151713,
30
- "bne": 151704,
31
- "c0": 151711,
32
- "c1": 151690,
33
- "c2": 151680,
34
- "c3": 151693,
35
- "c4": 151708,
36
- "c5": 151695,
37
- "c6": 151672,
38
- "jg": 151676,
39
- "jge": 151675,
40
- "jle": 151709,
41
- "jne": 151702,
42
- "r0": 151682,
43
- "r1": 151677,
44
- "r10": 151691,
45
- "r10d": 151668,
46
- "r11": 151685,
47
- "r11d": 151673,
48
- "r12": 151684,
49
- "r12d": 151669,
50
- "r13": 151701,
51
- "r13d": 151710,
52
- "r14": 151700,
53
- "r14d": 151688,
54
- "r15": 151694,
55
- "r15d": 151665,
56
- "r2": 151678,
57
  "r3": 151671,
58
- "r4": 151706,
59
- "r5": 151697,
60
- "r6": 151679,
61
- "r7": 151670,
62
- "r8": 151683,
63
- "r8d": 151696,
64
- "r9": 151698,
65
- "r9d": 151707,
66
  "rbp": 151674,
67
- "rbx": 151692,
68
- "rcx": 151687,
69
- "rdi": 151689,
70
  "rdx": 151699,
71
- "rsi": 151667,
72
- "swi": 151666
73
  }
 
21
  "<|vision_end|>": 151653,
22
  "<|vision_pad|>": 151654,
23
  "<|vision_start|>": 151652,
24
+ "QWORD": 151677,
25
+ "beq": 151680,
26
+ "bge": 151711,
27
+ "bgt": 151696,
28
+ "blt": 151683,
29
+ "blx": 151685,
30
+ "bne": 151705,
31
+ "c0": 151691,
32
+ "c1": 151702,
33
+ "c2": 151688,
34
+ "c3": 151697,
35
+ "c4": 151694,
36
+ "c5": 151673,
37
+ "c6": 151707,
38
+ "jg": 151703,
39
+ "jge": 151681,
40
+ "jle": 151700,
41
+ "jne": 151665,
42
+ "r0": 151675,
43
+ "r1": 151709,
44
+ "r10": 151713,
45
+ "r10d": 151687,
46
+ "r11": 151686,
47
+ "r11d": 151666,
48
+ "r12": 151672,
49
+ "r12d": 151690,
50
+ "r13": 151698,
51
+ "r13d": 151679,
52
+ "r14": 151668,
53
+ "r14d": 151706,
54
+ "r15": 151689,
55
+ "r15d": 151701,
56
+ "r2": 151710,
57
  "r3": 151671,
58
+ "r4": 151693,
59
+ "r5": 151695,
60
+ "r6": 151667,
61
+ "r7": 151676,
62
+ "r8": 151678,
63
+ "r8d": 151669,
64
+ "r9": 151704,
65
+ "r9d": 151670,
66
  "rbp": 151674,
67
+ "rbx": 151712,
68
+ "rcx": 151684,
69
+ "rdi": 151682,
70
  "rdx": 151699,
71
+ "rsi": 151708,
72
+ "swi": 151692
73
  }
config.json CHANGED
@@ -18,11 +18,12 @@
18
  "num_key_value_heads": 2,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-06,
 
21
  "rope_theta": 1000000.0,
22
  "sliding_window": null,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.44.2",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151714
 
18
  "num_key_value_heads": 2,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
  "rope_theta": 1000000.0,
23
  "sliding_window": null,
24
  "tie_word_embeddings": true,
25
  "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.46.0",
27
  "use_cache": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 151714
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945e7cbc5a46e5d149fe2b32bab1840a99cec0b9e35cafaafc55906691de6222
3
  size 987700000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5f2e28f6ec0fb3c2bd540daa7406b7806a886654561dc7e73f54e7ab5b43ae1
3
  size 987700000
runs/Nov25_06-57-22_gpu-54/events.out.tfevents.1732503480.gpu-54 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee77b815f21eb80767827073446926a92305c4cb7d67a95ea8261b5ead9954c
3
+ size 5463
runs/Nov25_07-01-26_gpu-54/events.out.tfevents.1732503722.gpu-54 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d762d52e5e40e65df9e612e8e67f35a7f159d52c3beb554910106fe28c0a2bc
3
+ size 13672
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d466bb19d7edfe94816e605063fe44dabedd25bb286af154d995c3360c116e26
3
- size 7040582
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db98b2e71bb0c5755ac564d537ab438f4da35002ccc08c914520275d2aefbaa8
3
+ size 11430706
tokenizer_config.json CHANGED
@@ -179,7 +179,7 @@
179
  "special": false
180
  },
181
  "151665": {
182
- "content": "r15d",
183
  "lstrip": false,
184
  "normalized": true,
185
  "rstrip": false,
@@ -187,7 +187,7 @@
187
  "special": false
188
  },
189
  "151666": {
190
- "content": "swi",
191
  "lstrip": false,
192
  "normalized": true,
193
  "rstrip": false,
@@ -195,7 +195,7 @@
195
  "special": false
196
  },
197
  "151667": {
198
- "content": "rsi",
199
  "lstrip": false,
200
  "normalized": true,
201
  "rstrip": false,
@@ -203,7 +203,7 @@
203
  "special": false
204
  },
205
  "151668": {
206
- "content": "r10d",
207
  "lstrip": false,
208
  "normalized": true,
209
  "rstrip": false,
@@ -211,7 +211,7 @@
211
  "special": false
212
  },
213
  "151669": {
214
- "content": "r12d",
215
  "lstrip": false,
216
  "normalized": true,
217
  "rstrip": false,
@@ -219,7 +219,7 @@
219
  "special": false
220
  },
221
  "151670": {
222
- "content": "r7",
223
  "lstrip": false,
224
  "normalized": true,
225
  "rstrip": false,
@@ -235,7 +235,7 @@
235
  "special": false
236
  },
237
  "151672": {
238
- "content": "c6",
239
  "lstrip": false,
240
  "normalized": true,
241
  "rstrip": false,
@@ -243,7 +243,7 @@
243
  "special": false
244
  },
245
  "151673": {
246
- "content": "r11d",
247
  "lstrip": false,
248
  "normalized": true,
249
  "rstrip": false,
@@ -259,7 +259,7 @@
259
  "special": false
260
  },
261
  "151675": {
262
- "content": "jge",
263
  "lstrip": false,
264
  "normalized": true,
265
  "rstrip": false,
@@ -267,7 +267,7 @@
267
  "special": false
268
  },
269
  "151676": {
270
- "content": "jg",
271
  "lstrip": false,
272
  "normalized": true,
273
  "rstrip": false,
@@ -275,7 +275,7 @@
275
  "special": false
276
  },
277
  "151677": {
278
- "content": "r1",
279
  "lstrip": false,
280
  "normalized": true,
281
  "rstrip": false,
@@ -283,7 +283,7 @@
283
  "special": false
284
  },
285
  "151678": {
286
- "content": "r2",
287
  "lstrip": false,
288
  "normalized": true,
289
  "rstrip": false,
@@ -291,7 +291,7 @@
291
  "special": false
292
  },
293
  "151679": {
294
- "content": "r6",
295
  "lstrip": false,
296
  "normalized": true,
297
  "rstrip": false,
@@ -299,7 +299,7 @@
299
  "special": false
300
  },
301
  "151680": {
302
- "content": "c2",
303
  "lstrip": false,
304
  "normalized": true,
305
  "rstrip": false,
@@ -307,7 +307,7 @@
307
  "special": false
308
  },
309
  "151681": {
310
- "content": "blt",
311
  "lstrip": false,
312
  "normalized": true,
313
  "rstrip": false,
@@ -315,7 +315,7 @@
315
  "special": false
316
  },
317
  "151682": {
318
- "content": "r0",
319
  "lstrip": false,
320
  "normalized": true,
321
  "rstrip": false,
@@ -323,7 +323,7 @@
323
  "special": false
324
  },
325
  "151683": {
326
- "content": "r8",
327
  "lstrip": false,
328
  "normalized": true,
329
  "rstrip": false,
@@ -331,7 +331,7 @@
331
  "special": false
332
  },
333
  "151684": {
334
- "content": "r12",
335
  "lstrip": false,
336
  "normalized": true,
337
  "rstrip": false,
@@ -339,7 +339,7 @@
339
  "special": false
340
  },
341
  "151685": {
342
- "content": "r11",
343
  "lstrip": false,
344
  "normalized": true,
345
  "rstrip": false,
@@ -347,7 +347,7 @@
347
  "special": false
348
  },
349
  "151686": {
350
- "content": "QWORD",
351
  "lstrip": false,
352
  "normalized": true,
353
  "rstrip": false,
@@ -355,7 +355,7 @@
355
  "special": false
356
  },
357
  "151687": {
358
- "content": "rcx",
359
  "lstrip": false,
360
  "normalized": true,
361
  "rstrip": false,
@@ -363,7 +363,7 @@
363
  "special": false
364
  },
365
  "151688": {
366
- "content": "r14d",
367
  "lstrip": false,
368
  "normalized": true,
369
  "rstrip": false,
@@ -371,7 +371,7 @@
371
  "special": false
372
  },
373
  "151689": {
374
- "content": "rdi",
375
  "lstrip": false,
376
  "normalized": true,
377
  "rstrip": false,
@@ -379,7 +379,7 @@
379
  "special": false
380
  },
381
  "151690": {
382
- "content": "c1",
383
  "lstrip": false,
384
  "normalized": true,
385
  "rstrip": false,
@@ -387,7 +387,7 @@
387
  "special": false
388
  },
389
  "151691": {
390
- "content": "r10",
391
  "lstrip": false,
392
  "normalized": true,
393
  "rstrip": false,
@@ -395,7 +395,7 @@
395
  "special": false
396
  },
397
  "151692": {
398
- "content": "rbx",
399
  "lstrip": false,
400
  "normalized": true,
401
  "rstrip": false,
@@ -403,7 +403,7 @@
403
  "special": false
404
  },
405
  "151693": {
406
- "content": "c3",
407
  "lstrip": false,
408
  "normalized": true,
409
  "rstrip": false,
@@ -411,7 +411,7 @@
411
  "special": false
412
  },
413
  "151694": {
414
- "content": "r15",
415
  "lstrip": false,
416
  "normalized": true,
417
  "rstrip": false,
@@ -419,7 +419,7 @@
419
  "special": false
420
  },
421
  "151695": {
422
- "content": "c5",
423
  "lstrip": false,
424
  "normalized": true,
425
  "rstrip": false,
@@ -427,7 +427,7 @@
427
  "special": false
428
  },
429
  "151696": {
430
- "content": "r8d",
431
  "lstrip": false,
432
  "normalized": true,
433
  "rstrip": false,
@@ -435,7 +435,7 @@
435
  "special": false
436
  },
437
  "151697": {
438
- "content": "r5",
439
  "lstrip": false,
440
  "normalized": true,
441
  "rstrip": false,
@@ -443,7 +443,7 @@
443
  "special": false
444
  },
445
  "151698": {
446
- "content": "r9",
447
  "lstrip": false,
448
  "normalized": true,
449
  "rstrip": false,
@@ -459,7 +459,7 @@
459
  "special": false
460
  },
461
  "151700": {
462
- "content": "r14",
463
  "lstrip": false,
464
  "normalized": true,
465
  "rstrip": false,
@@ -467,7 +467,7 @@
467
  "special": false
468
  },
469
  "151701": {
470
- "content": "r13",
471
  "lstrip": false,
472
  "normalized": true,
473
  "rstrip": false,
@@ -475,7 +475,7 @@
475
  "special": false
476
  },
477
  "151702": {
478
- "content": "jne",
479
  "lstrip": false,
480
  "normalized": true,
481
  "rstrip": false,
@@ -483,7 +483,7 @@
483
  "special": false
484
  },
485
  "151703": {
486
- "content": "bge",
487
  "lstrip": false,
488
  "normalized": true,
489
  "rstrip": false,
@@ -491,7 +491,7 @@
491
  "special": false
492
  },
493
  "151704": {
494
- "content": "bne",
495
  "lstrip": false,
496
  "normalized": true,
497
  "rstrip": false,
@@ -499,7 +499,7 @@
499
  "special": false
500
  },
501
  "151705": {
502
- "content": "beq",
503
  "lstrip": false,
504
  "normalized": true,
505
  "rstrip": false,
@@ -507,7 +507,7 @@
507
  "special": false
508
  },
509
  "151706": {
510
- "content": "r4",
511
  "lstrip": false,
512
  "normalized": true,
513
  "rstrip": false,
@@ -515,7 +515,7 @@
515
  "special": false
516
  },
517
  "151707": {
518
- "content": "r9d",
519
  "lstrip": false,
520
  "normalized": true,
521
  "rstrip": false,
@@ -523,7 +523,7 @@
523
  "special": false
524
  },
525
  "151708": {
526
- "content": "c4",
527
  "lstrip": false,
528
  "normalized": true,
529
  "rstrip": false,
@@ -531,7 +531,7 @@
531
  "special": false
532
  },
533
  "151709": {
534
- "content": "jle",
535
  "lstrip": false,
536
  "normalized": true,
537
  "rstrip": false,
@@ -539,7 +539,7 @@
539
  "special": false
540
  },
541
  "151710": {
542
- "content": "r13d",
543
  "lstrip": false,
544
  "normalized": true,
545
  "rstrip": false,
@@ -547,7 +547,7 @@
547
  "special": false
548
  },
549
  "151711": {
550
- "content": "c0",
551
  "lstrip": false,
552
  "normalized": true,
553
  "rstrip": false,
@@ -555,7 +555,7 @@
555
  "special": false
556
  },
557
  "151712": {
558
- "content": "bgt",
559
  "lstrip": false,
560
  "normalized": true,
561
  "rstrip": false,
@@ -563,7 +563,7 @@
563
  "special": false
564
  },
565
  "151713": {
566
- "content": "blx",
567
  "lstrip": false,
568
  "normalized": true,
569
  "rstrip": false,
 
179
  "special": false
180
  },
181
  "151665": {
182
+ "content": "jne",
183
  "lstrip": false,
184
  "normalized": true,
185
  "rstrip": false,
 
187
  "special": false
188
  },
189
  "151666": {
190
+ "content": "r11d",
191
  "lstrip": false,
192
  "normalized": true,
193
  "rstrip": false,
 
195
  "special": false
196
  },
197
  "151667": {
198
+ "content": "r6",
199
  "lstrip": false,
200
  "normalized": true,
201
  "rstrip": false,
 
203
  "special": false
204
  },
205
  "151668": {
206
+ "content": "r14",
207
  "lstrip": false,
208
  "normalized": true,
209
  "rstrip": false,
 
211
  "special": false
212
  },
213
  "151669": {
214
+ "content": "r8d",
215
  "lstrip": false,
216
  "normalized": true,
217
  "rstrip": false,
 
219
  "special": false
220
  },
221
  "151670": {
222
+ "content": "r9d",
223
  "lstrip": false,
224
  "normalized": true,
225
  "rstrip": false,
 
235
  "special": false
236
  },
237
  "151672": {
238
+ "content": "r12",
239
  "lstrip": false,
240
  "normalized": true,
241
  "rstrip": false,
 
243
  "special": false
244
  },
245
  "151673": {
246
+ "content": "c5",
247
  "lstrip": false,
248
  "normalized": true,
249
  "rstrip": false,
 
259
  "special": false
260
  },
261
  "151675": {
262
+ "content": "r0",
263
  "lstrip": false,
264
  "normalized": true,
265
  "rstrip": false,
 
267
  "special": false
268
  },
269
  "151676": {
270
+ "content": "r7",
271
  "lstrip": false,
272
  "normalized": true,
273
  "rstrip": false,
 
275
  "special": false
276
  },
277
  "151677": {
278
+ "content": "QWORD",
279
  "lstrip": false,
280
  "normalized": true,
281
  "rstrip": false,
 
283
  "special": false
284
  },
285
  "151678": {
286
+ "content": "r8",
287
  "lstrip": false,
288
  "normalized": true,
289
  "rstrip": false,
 
291
  "special": false
292
  },
293
  "151679": {
294
+ "content": "r13d",
295
  "lstrip": false,
296
  "normalized": true,
297
  "rstrip": false,
 
299
  "special": false
300
  },
301
  "151680": {
302
+ "content": "beq",
303
  "lstrip": false,
304
  "normalized": true,
305
  "rstrip": false,
 
307
  "special": false
308
  },
309
  "151681": {
310
+ "content": "jge",
311
  "lstrip": false,
312
  "normalized": true,
313
  "rstrip": false,
 
315
  "special": false
316
  },
317
  "151682": {
318
+ "content": "rdi",
319
  "lstrip": false,
320
  "normalized": true,
321
  "rstrip": false,
 
323
  "special": false
324
  },
325
  "151683": {
326
+ "content": "blt",
327
  "lstrip": false,
328
  "normalized": true,
329
  "rstrip": false,
 
331
  "special": false
332
  },
333
  "151684": {
334
+ "content": "rcx",
335
  "lstrip": false,
336
  "normalized": true,
337
  "rstrip": false,
 
339
  "special": false
340
  },
341
  "151685": {
342
+ "content": "blx",
343
  "lstrip": false,
344
  "normalized": true,
345
  "rstrip": false,
 
347
  "special": false
348
  },
349
  "151686": {
350
+ "content": "r11",
351
  "lstrip": false,
352
  "normalized": true,
353
  "rstrip": false,
 
355
  "special": false
356
  },
357
  "151687": {
358
+ "content": "r10d",
359
  "lstrip": false,
360
  "normalized": true,
361
  "rstrip": false,
 
363
  "special": false
364
  },
365
  "151688": {
366
+ "content": "c2",
367
  "lstrip": false,
368
  "normalized": true,
369
  "rstrip": false,
 
371
  "special": false
372
  },
373
  "151689": {
374
+ "content": "r15",
375
  "lstrip": false,
376
  "normalized": true,
377
  "rstrip": false,
 
379
  "special": false
380
  },
381
  "151690": {
382
+ "content": "r12d",
383
  "lstrip": false,
384
  "normalized": true,
385
  "rstrip": false,
 
387
  "special": false
388
  },
389
  "151691": {
390
+ "content": "c0",
391
  "lstrip": false,
392
  "normalized": true,
393
  "rstrip": false,
 
395
  "special": false
396
  },
397
  "151692": {
398
+ "content": "swi",
399
  "lstrip": false,
400
  "normalized": true,
401
  "rstrip": false,
 
403
  "special": false
404
  },
405
  "151693": {
406
+ "content": "r4",
407
  "lstrip": false,
408
  "normalized": true,
409
  "rstrip": false,
 
411
  "special": false
412
  },
413
  "151694": {
414
+ "content": "c4",
415
  "lstrip": false,
416
  "normalized": true,
417
  "rstrip": false,
 
419
  "special": false
420
  },
421
  "151695": {
422
+ "content": "r5",
423
  "lstrip": false,
424
  "normalized": true,
425
  "rstrip": false,
 
427
  "special": false
428
  },
429
  "151696": {
430
+ "content": "bgt",
431
  "lstrip": false,
432
  "normalized": true,
433
  "rstrip": false,
 
435
  "special": false
436
  },
437
  "151697": {
438
+ "content": "c3",
439
  "lstrip": false,
440
  "normalized": true,
441
  "rstrip": false,
 
443
  "special": false
444
  },
445
  "151698": {
446
+ "content": "r13",
447
  "lstrip": false,
448
  "normalized": true,
449
  "rstrip": false,
 
459
  "special": false
460
  },
461
  "151700": {
462
+ "content": "jle",
463
  "lstrip": false,
464
  "normalized": true,
465
  "rstrip": false,
 
467
  "special": false
468
  },
469
  "151701": {
470
+ "content": "r15d",
471
  "lstrip": false,
472
  "normalized": true,
473
  "rstrip": false,
 
475
  "special": false
476
  },
477
  "151702": {
478
+ "content": "c1",
479
  "lstrip": false,
480
  "normalized": true,
481
  "rstrip": false,
 
483
  "special": false
484
  },
485
  "151703": {
486
+ "content": "jg",
487
  "lstrip": false,
488
  "normalized": true,
489
  "rstrip": false,
 
491
  "special": false
492
  },
493
  "151704": {
494
+ "content": "r9",
495
  "lstrip": false,
496
  "normalized": true,
497
  "rstrip": false,
 
499
  "special": false
500
  },
501
  "151705": {
502
+ "content": "bne",
503
  "lstrip": false,
504
  "normalized": true,
505
  "rstrip": false,
 
507
  "special": false
508
  },
509
  "151706": {
510
+ "content": "r14d",
511
  "lstrip": false,
512
  "normalized": true,
513
  "rstrip": false,
 
515
  "special": false
516
  },
517
  "151707": {
518
+ "content": "c6",
519
  "lstrip": false,
520
  "normalized": true,
521
  "rstrip": false,
 
523
  "special": false
524
  },
525
  "151708": {
526
+ "content": "rsi",
527
  "lstrip": false,
528
  "normalized": true,
529
  "rstrip": false,
 
531
  "special": false
532
  },
533
  "151709": {
534
+ "content": "r1",
535
  "lstrip": false,
536
  "normalized": true,
537
  "rstrip": false,
 
539
  "special": false
540
  },
541
  "151710": {
542
+ "content": "r2",
543
  "lstrip": false,
544
  "normalized": true,
545
  "rstrip": false,
 
547
  "special": false
548
  },
549
  "151711": {
550
+ "content": "bge",
551
  "lstrip": false,
552
  "normalized": true,
553
  "rstrip": false,
 
555
  "special": false
556
  },
557
  "151712": {
558
+ "content": "rbx",
559
  "lstrip": false,
560
  "normalized": true,
561
  "rstrip": false,
 
563
  "special": false
564
  },
565
  "151713": {
566
+ "content": "r10",
567
  "lstrip": false,
568
  "normalized": true,
569
  "rstrip": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f1ba02a2e02308d1b6deb3630834904fece7e08080b4f5cbc9b2d5b9ac3347d
3
- size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab0a241b29d40fc001851d77ec37a1f2998091091d43fb118de8e722cce517c
3
+ size 5560