Rafa1986 commited on
Commit
8b5a642
·
verified ·
1 Parent(s): a64a105

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -26,7 +26,9 @@ def extract_files_from_folder(folder_path):
26
  """Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
27
  extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
28
 
29
- for root, _, files in os.walk(folder_path):
 
 
30
  for file_name in files:
31
  file_path = os.path.join(root, file_name)
32
  if file_name.endswith(".pdf"):
@@ -37,12 +39,15 @@ def extract_files_from_folder(folder_path):
37
  extracted_files["csv"].append(file_path)
38
  elif file_name.endswith(".docx"):
39
  extracted_files["docx"].append(file_path)
 
 
40
  return extracted_files
41
 
42
  def read_text_from_files(file_paths):
43
  """Reads text content from a list of files."""
44
  text = ""
45
  for file_path in file_paths:
 
46
  with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
47
  text += file.read() + "\n"
48
  return text
@@ -50,6 +55,7 @@ def read_text_from_files(file_paths):
50
  def get_text_from_pdf(pdf_files):
51
  text = ""
52
  for pdf_path in pdf_files:
 
53
  with open(pdf_path, "rb") as pdf_file:
54
  reader = PyPDF2.PdfReader(pdf_file)
55
  for page in reader.pages:
@@ -59,6 +65,7 @@ def get_text_from_pdf(pdf_files):
59
  def get_text_from_csv(csv_files):
60
  text = ""
61
  for csv_path in csv_files:
 
62
  df = pd.read_csv(csv_path)
63
  text += df.to_string() + "\n"
64
  return text
@@ -119,4 +126,4 @@ demo = gr.Interface(
119
  outputs=gr.Textbox(label="Answer")
120
  )
121
 
122
- demo.launch()
 
26
  """Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
27
  extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
28
 
29
+ print(f"Scanning folder: {folder_path}")
30
+ for root, subdirs, files in os.walk(folder_path):
31
+ print(f"Checking folder: {root}") # Debugging log for subfolders
32
  for file_name in files:
33
  file_path = os.path.join(root, file_name)
34
  if file_name.endswith(".pdf"):
 
39
  extracted_files["csv"].append(file_path)
40
  elif file_name.endswith(".docx"):
41
  extracted_files["docx"].append(file_path)
42
+
43
+ print("Files found:", extracted_files) # Debugging log
44
  return extracted_files
45
 
46
  def read_text_from_files(file_paths):
47
  """Reads text content from a list of files."""
48
  text = ""
49
  for file_path in file_paths:
50
+ print(f"Reading text file: {file_path}") # Debugging log
51
  with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
52
  text += file.read() + "\n"
53
  return text
 
55
  def get_text_from_pdf(pdf_files):
56
  text = ""
57
  for pdf_path in pdf_files:
58
+ print(f"Reading PDF file: {pdf_path}") # Debugging log
59
  with open(pdf_path, "rb") as pdf_file:
60
  reader = PyPDF2.PdfReader(pdf_file)
61
  for page in reader.pages:
 
65
  def get_text_from_csv(csv_files):
66
  text = ""
67
  for csv_path in csv_files:
68
+ print(f"Reading CSV file: {csv_path}") # Debugging log
69
  df = pd.read_csv(csv_path)
70
  text += df.to_string() + "\n"
71
  return text
 
126
  outputs=gr.Textbox(label="Answer")
127
  )
128
 
129
+ demo.launch()