Commit
·
38d7771
1
Parent(s):
705eb2e
Upload tokenizer
Browse files- tokenizer.json +150 -2
tokenizer.json
CHANGED
@@ -386,7 +386,81 @@
|
|
386 |
"Ġdaayim": 324,
|
387 |
"Ġyuxw": 325,
|
388 |
"Ġaloohl": 326,
|
389 |
-
"Ġbax": 327
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
},
|
391 |
"merges": [
|
392 |
"s t",
|
@@ -669,7 +743,81 @@
|
|
669 |
"Ġd aayim",
|
670 |
"Ġyu xw",
|
671 |
"Ġa loohl",
|
672 |
-
"Ġb ax"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
]
|
674 |
}
|
675 |
}
|
|
|
386 |
"Ġdaayim": 324,
|
387 |
"Ġyuxw": 325,
|
388 |
"Ġaloohl": 326,
|
389 |
+
"Ġbax": 327,
|
390 |
+
"Ġbaasx": 328,
|
391 |
+
"Ġligit": 329,
|
392 |
+
"Ġjok": 330,
|
393 |
+
"Ġsg": 331,
|
394 |
+
"Ġsi": 332,
|
395 |
+
"ĠSpain": 333,
|
396 |
+
"nakwhl": 334,
|
397 |
+
"Ġhehl": 335,
|
398 |
+
"Ġhediit": 336,
|
399 |
+
"diithl": 337,
|
400 |
+
"witxwit": 338,
|
401 |
+
"Ġjaphl": 339,
|
402 |
+
"nithl": 340,
|
403 |
+
"ytxwhl": 341,
|
404 |
+
"Ġxhlii": 342,
|
405 |
+
"Ġdaayimaahl": 343,
|
406 |
+
"Ġyuxwdiithl": 344,
|
407 |
+
"Ġbaasxi": 345,
|
408 |
+
"Nakwhl": 346,
|
409 |
+
"gwi": 347,
|
410 |
+
"ukwhl": 348,
|
411 |
+
"yukwhl": 349,
|
412 |
+
"ĠAk": 350,
|
413 |
+
"ĠAgwi": 351,
|
414 |
+
"ĠAgwiyukwhl": 352,
|
415 |
+
"BM": 353,
|
416 |
+
"De": 354,
|
417 |
+
"Gi": 355,
|
418 |
+
"IBM": 356,
|
419 |
+
"aw": 357,
|
420 |
+
"ail": 358,
|
421 |
+
"ce": 359,
|
422 |
+
"ff": 360,
|
423 |
+
"gee": 361,
|
424 |
+
"it": 362,
|
425 |
+
"iwaa": 363,
|
426 |
+
"ice": 364,
|
427 |
+
"jit": 365,
|
428 |
+
"ljit": 366,
|
429 |
+
"mar": 367,
|
430 |
+
"mail": 368,
|
431 |
+
"nmar": 369,
|
432 |
+
"oxs": 370,
|
433 |
+
"off": 371,
|
434 |
+
"si": 372,
|
435 |
+
"wan": 373,
|
436 |
+
"way": 374,
|
437 |
+
"yo": 375,
|
438 |
+
"ĠDe": 376,
|
439 |
+
"ĠGi": 377,
|
440 |
+
"ĠIBM": 378,
|
441 |
+
"Ġmail": 379,
|
442 |
+
"Ġoff": 380,
|
443 |
+
"niiwan": 381,
|
444 |
+
"niiyo": 382,
|
445 |
+
"xsiwaa": 383,
|
446 |
+
"Ġsaw": 384,
|
447 |
+
"nix": 385,
|
448 |
+
"Ġwok": 386,
|
449 |
+
"atdiit": 387,
|
450 |
+
"̲.\"": 388,
|
451 |
+
"oosun": 389,
|
452 |
+
"ĠAp": 390,
|
453 |
+
"Ġamxsiwaa": 391,
|
454 |
+
"Ġaks": 392,
|
455 |
+
"geenix": 393,
|
456 |
+
"nmark": 394,
|
457 |
+
"oxsxw": 395,
|
458 |
+
"wayi": 396,
|
459 |
+
"ĠDenmark": 397,
|
460 |
+
"ĠGigeenix": 398,
|
461 |
+
"Ġoffice": 399,
|
462 |
+
"Ġsawatdiit": 400,
|
463 |
+
"ytxw": 401
|
464 |
},
|
465 |
"merges": [
|
466 |
"s t",
|
|
|
743 |
"Ġd aayim",
|
744 |
"Ġyu xw",
|
745 |
"Ġa loohl",
|
746 |
+
"Ġb ax",
|
747 |
+
"Ġb aasx",
|
748 |
+
"Ġligi t",
|
749 |
+
"Ġj ok",
|
750 |
+
"Ġs g",
|
751 |
+
"Ġs i",
|
752 |
+
"ĠS pain",
|
753 |
+
"na kwhl",
|
754 |
+
"Ġhe hl",
|
755 |
+
"Ġhe diit",
|
756 |
+
"diit hl",
|
757 |
+
"wit xwit",
|
758 |
+
"Ġja phl",
|
759 |
+
"nit hl",
|
760 |
+
"yt xwhl",
|
761 |
+
"Ġxhl ii",
|
762 |
+
"Ġdaayim aahl",
|
763 |
+
"Ġyuxw diithl",
|
764 |
+
"Ġbaasx i",
|
765 |
+
"N akwhl",
|
766 |
+
"g wi",
|
767 |
+
"u kwhl",
|
768 |
+
"y ukwhl",
|
769 |
+
"ĠA k",
|
770 |
+
"ĠA gwi",
|
771 |
+
"ĠAgwi yukwhl",
|
772 |
+
"B M",
|
773 |
+
"D e",
|
774 |
+
"G i",
|
775 |
+
"I BM",
|
776 |
+
"a w",
|
777 |
+
"a il",
|
778 |
+
"c e",
|
779 |
+
"f f",
|
780 |
+
"g ee",
|
781 |
+
"i t",
|
782 |
+
"i waa",
|
783 |
+
"i ce",
|
784 |
+
"j it",
|
785 |
+
"l jit",
|
786 |
+
"m ar",
|
787 |
+
"m ail",
|
788 |
+
"n mar",
|
789 |
+
"o xs",
|
790 |
+
"o ff",
|
791 |
+
"s i",
|
792 |
+
"w an",
|
793 |
+
"w ay",
|
794 |
+
"y o",
|
795 |
+
"Ġ De",
|
796 |
+
"Ġ Gi",
|
797 |
+
"Ġ IBM",
|
798 |
+
"Ġ mail",
|
799 |
+
"Ġ off",
|
800 |
+
"nii wan",
|
801 |
+
"nii yo",
|
802 |
+
"xs iwaa",
|
803 |
+
"Ġs aw",
|
804 |
+
"ni x",
|
805 |
+
"Ġw ok",
|
806 |
+
"at diit",
|
807 |
+
"̲. \"",
|
808 |
+
"oos un",
|
809 |
+
"ĠA p",
|
810 |
+
"Ġam xsiwaa",
|
811 |
+
"Ġak s",
|
812 |
+
"gee nix",
|
813 |
+
"nmar k",
|
814 |
+
"oxs xw",
|
815 |
+
"way i",
|
816 |
+
"ĠDe nmark",
|
817 |
+
"ĠGi geenix",
|
818 |
+
"Ġoff ice",
|
819 |
+
"Ġsaw atdiit",
|
820 |
+
"yt xw"
|
821 |
]
|
822 |
}
|
823 |
}
|