Spaces:
Running
Running
Ensure utf8
Browse filesNot using utf8 by default should be illegal
- dataset.py +6 -5
dataset.py
CHANGED
@@ -27,7 +27,7 @@ class DatasetReader(IterableDataset):
|
|
27 |
)
|
28 |
|
29 |
def __iter__(self):
|
30 |
-
file_itr = open(self.filename, "r")
|
31 |
mapped_itr = map(self.preprocess, file_itr)
|
32 |
return mapped_itr
|
33 |
|
@@ -56,10 +56,11 @@ class ParallelTextReader(IterableDataset):
|
|
56 |
return pred, [gold]
|
57 |
|
58 |
def __iter__(self):
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
63 |
|
64 |
def __len__(self):
|
65 |
return self.num_sentences
|
|
|
27 |
)
|
28 |
|
29 |
def __iter__(self):
|
30 |
+
file_itr = open(self.filename, "r", encoding="utf8")
|
31 |
mapped_itr = map(self.preprocess, file_itr)
|
32 |
return mapped_itr
|
33 |
|
|
|
56 |
return pred, [gold]
|
57 |
|
58 |
def __iter__(self):
|
59 |
+
with open(self.pred_path, "r", encoding="utf8") as pred_itr, open(
|
60 |
+
self.gold_path, "r", encoding="utf8"
|
61 |
+
) as gold_itr:
|
62 |
+
mapped_itr = map(self.preprocess, pred_itr, gold_itr)
|
63 |
+
return mapped_itr
|
64 |
|
65 |
def __len__(self):
|
66 |
return self.num_sentences
|