Iker commited on
Commit
91cb6cd
·
1 Parent(s): 4db19c9

Ensure utf8

Browse files

Not using utf8 by default should be illegal

Files changed (1) hide show
  1. dataset.py +6 -5
dataset.py CHANGED
@@ -27,7 +27,7 @@ class DatasetReader(IterableDataset):
27
  )
28
 
29
  def __iter__(self):
30
- file_itr = open(self.filename, "r")
31
  mapped_itr = map(self.preprocess, file_itr)
32
  return mapped_itr
33
 
@@ -56,10 +56,11 @@ class ParallelTextReader(IterableDataset):
56
  return pred, [gold]
57
 
58
  def __iter__(self):
59
- pred_itr = open(self.pred_path, "r")
60
- gold_itr = open(self.gold_path, "r")
61
- mapped_itr = map(self.preprocess, pred_itr, gold_itr)
62
- return mapped_itr
 
63
 
64
  def __len__(self):
65
  return self.num_sentences
 
27
  )
28
 
29
  def __iter__(self):
30
+ file_itr = open(self.filename, "r", encoding="utf8")
31
  mapped_itr = map(self.preprocess, file_itr)
32
  return mapped_itr
33
 
 
56
  return pred, [gold]
57
 
58
  def __iter__(self):
59
+ with open(self.pred_path, "r", encoding="utf8") as pred_itr, open(
60
+ self.gold_path, "r", encoding="utf8"
61
+ ) as gold_itr:
62
+ mapped_itr = map(self.preprocess, pred_itr, gold_itr)
63
+ return mapped_itr
64
 
65
  def __len__(self):
66
  return self.num_sentences