jon-tow commited on
Commit
3c66e0d
·
1 Parent(s): 8593ce9

fix: remove `print` debug statements

Browse files
Files changed (1) hide show
  1. tokenization_arcade100k.py +0 -4
tokenization_arcade100k.py CHANGED
@@ -78,10 +78,6 @@ def _arcade100k(vocab_file: str):
78
  for i, t in enumerate(SPECIAL_TOKENS_NAMES)
79
  }
80
 
81
- print(len(mergeable_ranks))
82
- print(len(SPECIAL_TOKENS))
83
- print(len(mergeable_ranks) + len(SPECIAL_TOKENS))
84
-
85
  return {
86
  "name": NAME,
87
  "pat_str": r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""",
 
78
  for i, t in enumerate(SPECIAL_TOKENS_NAMES)
79
  }
80
 
 
 
 
 
81
  return {
82
  "name": NAME,
83
  "pat_str": r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""",