atiwari751 commited on
Commit
cae9627
·
1 Parent(s): 1e8ebcb

eng test re

Browse files
Files changed (1) hide show
  1. BPE.py +4 -4
BPE.py CHANGED
@@ -14,7 +14,7 @@ tokens = re.findall(gpt2pat, text)
14
 
15
  # Convert tokens to a list of integers in range 0..255 for convenience
16
  tokens = [ord(char) for token in tokens for char in token]
17
- print(tokens)
18
 
19
  def get_stats(ids):
20
  counts = {}
@@ -62,9 +62,9 @@ if __name__ == "__main__":
62
  print("length of tokens:", len(tokens))
63
 
64
  # Run BPE and save results
65
- #merges, ids, num_merges = perform_bpe()
66
 
67
  # Save merges and vocab to a file
68
- #with open('bpe_results.pkl', 'wb') as f:
69
- #pickle.dump((merges, ids, num_merges), f)
70
 
 
14
 
15
  # Convert tokens to a list of integers in range 0..255 for convenience
16
  tokens = [ord(char) for token in tokens for char in token]
17
+ #print(tokens)
18
 
19
  def get_stats(ids):
20
  counts = {}
 
62
  print("length of tokens:", len(tokens))
63
 
64
  # Run BPE and save results
65
+ merges, ids, num_merges = perform_bpe()
66
 
67
  # Save merges and vocab to a file
68
+ with open('bpe_results.pkl', 'wb') as f:
69
+ pickle.dump((merges, ids, num_merges), f)
70