Spaces:
Sleeping
Sleeping
Update preprocessing.py
Browse files- preprocessing.py +1 -1
preprocessing.py
CHANGED
@@ -196,7 +196,7 @@ def read_file(file_path):
|
|
196 |
reader = pypdf.PdfReader(file_path)
|
197 |
raw_data = ""
|
198 |
for idx in range(len(reader.pages)):
|
199 |
-
raw_data +=
|
200 |
if not is_meaningful(raw_data):
|
201 |
print("this text not supported")
|
202 |
raise ValueError("Unsupported file format.")
|
|
|
196 |
reader = pypdf.PdfReader(file_path)
|
197 |
raw_data = ""
|
198 |
for idx in range(len(reader.pages)):
|
199 |
+
raw_data += reader.pages[idx].extract_text()
|
200 |
if not is_meaningful(raw_data):
|
201 |
print("this text not supported")
|
202 |
raise ValueError("Unsupported file format.")
|