Danielrahmai1991 commited on
Commit
53ab711
·
verified ·
1 Parent(s): 69fb57b

Update preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +1 -1
preprocessing.py CHANGED
@@ -196,7 +196,7 @@ def read_file(file_path):
196
  reader = pypdf.PdfReader(file_path)
197
  raw_data = ""
198
  for idx in range(len(reader.pages)):
199
- raw_data += book_preprocessing(reader.pages[idx].extract_text())
200
  if not is_meaningful(raw_data):
201
  print("this text not supported")
202
  raise ValueError("Unsupported file format.")
 
196
  reader = pypdf.PdfReader(file_path)
197
  raw_data = ""
198
  for idx in range(len(reader.pages)):
199
+ raw_data += reader.pages[idx].extract_text()
200
  if not is_meaningful(raw_data):
201
  print("this text not supported")
202
  raise ValueError("Unsupported file format.")