Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

App Files Files Community

Rafa1986 commited on Mar 14

Commit

8b5a642

verified ·

1 Parent(s): a64a105

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -2

app.py CHANGED Viewed

@@ -26,7 +26,9 @@ def extract_files_from_folder(folder_path):
     """Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
     extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
-    for root, _, files in os.walk(folder_path):
         for file_name in files:
             file_path = os.path.join(root, file_name)
             if file_name.endswith(".pdf"):
@@ -37,12 +39,15 @@ def extract_files_from_folder(folder_path):
                 extracted_files["csv"].append(file_path)
             elif file_name.endswith(".docx"):
                 extracted_files["docx"].append(file_path)
     return extracted_files
 def read_text_from_files(file_paths):
     """Reads text content from a list of files."""
     text = ""
     for file_path in file_paths:
         with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
             text += file.read() + "\n"
     return text
@@ -50,6 +55,7 @@ def read_text_from_files(file_paths):
 def get_text_from_pdf(pdf_files):
     text = ""
     for pdf_path in pdf_files:
         with open(pdf_path, "rb") as pdf_file:
             reader = PyPDF2.PdfReader(pdf_file)
             for page in reader.pages:
@@ -59,6 +65,7 @@ def get_text_from_pdf(pdf_files):
 def get_text_from_csv(csv_files):
     text = ""
     for csv_path in csv_files:
         df = pd.read_csv(csv_path)
         text += df.to_string() + "\n"
     return text
@@ -119,4 +126,4 @@ demo = gr.Interface(
     outputs=gr.Textbox(label="Answer")
 )
-demo.launch()

     """Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
     extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
+    print(f"Scanning folder: {folder_path}")
+    for root, subdirs, files in os.walk(folder_path):
+        print(f"Checking folder: {root}")  # Debugging log for subfolders
         for file_name in files:
             file_path = os.path.join(root, file_name)
             if file_name.endswith(".pdf"):
                 extracted_files["csv"].append(file_path)
             elif file_name.endswith(".docx"):
                 extracted_files["docx"].append(file_path)
+    print("Files found:", extracted_files)  # Debugging log
     return extracted_files
 def read_text_from_files(file_paths):
     """Reads text content from a list of files."""
     text = ""
     for file_path in file_paths:
+        print(f"Reading text file: {file_path}")  # Debugging log
         with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
             text += file.read() + "\n"
     return text
 def get_text_from_pdf(pdf_files):
     text = ""
     for pdf_path in pdf_files:
+        print(f"Reading PDF file: {pdf_path}")  # Debugging log
         with open(pdf_path, "rb") as pdf_file:
             reader = PyPDF2.PdfReader(pdf_file)
             for page in reader.pages:
 def get_text_from_csv(csv_files):
     text = ""
     for csv_path in csv_files:
+        print(f"Reading CSV file: {csv_path}")  # Debugging log
         df = pd.read_csv(csv_path)
         text += df.to_string() + "\n"
     return text
     outputs=gr.Textbox(label="Answer")
 )
+demo.launch()