Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,7 +26,9 @@ def extract_files_from_folder(folder_path):
|
|
| 26 |
"""Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
|
| 27 |
extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
for file_name in files:
|
| 31 |
file_path = os.path.join(root, file_name)
|
| 32 |
if file_name.endswith(".pdf"):
|
|
@@ -37,12 +39,15 @@ def extract_files_from_folder(folder_path):
|
|
| 37 |
extracted_files["csv"].append(file_path)
|
| 38 |
elif file_name.endswith(".docx"):
|
| 39 |
extracted_files["docx"].append(file_path)
|
|
|
|
|
|
|
| 40 |
return extracted_files
|
| 41 |
|
| 42 |
def read_text_from_files(file_paths):
|
| 43 |
"""Reads text content from a list of files."""
|
| 44 |
text = ""
|
| 45 |
for file_path in file_paths:
|
|
|
|
| 46 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
| 47 |
text += file.read() + "\n"
|
| 48 |
return text
|
|
@@ -50,6 +55,7 @@ def read_text_from_files(file_paths):
|
|
| 50 |
def get_text_from_pdf(pdf_files):
|
| 51 |
text = ""
|
| 52 |
for pdf_path in pdf_files:
|
|
|
|
| 53 |
with open(pdf_path, "rb") as pdf_file:
|
| 54 |
reader = PyPDF2.PdfReader(pdf_file)
|
| 55 |
for page in reader.pages:
|
|
@@ -59,6 +65,7 @@ def get_text_from_pdf(pdf_files):
|
|
| 59 |
def get_text_from_csv(csv_files):
|
| 60 |
text = ""
|
| 61 |
for csv_path in csv_files:
|
|
|
|
| 62 |
df = pd.read_csv(csv_path)
|
| 63 |
text += df.to_string() + "\n"
|
| 64 |
return text
|
|
@@ -119,4 +126,4 @@ demo = gr.Interface(
|
|
| 119 |
outputs=gr.Textbox(label="Answer")
|
| 120 |
)
|
| 121 |
|
| 122 |
-
demo.launch()
|
|
|
|
| 26 |
"""Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
|
| 27 |
extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
|
| 28 |
|
| 29 |
+
print(f"Scanning folder: {folder_path}")
|
| 30 |
+
for root, subdirs, files in os.walk(folder_path):
|
| 31 |
+
print(f"Checking folder: {root}") # Debugging log for subfolders
|
| 32 |
for file_name in files:
|
| 33 |
file_path = os.path.join(root, file_name)
|
| 34 |
if file_name.endswith(".pdf"):
|
|
|
|
| 39 |
extracted_files["csv"].append(file_path)
|
| 40 |
elif file_name.endswith(".docx"):
|
| 41 |
extracted_files["docx"].append(file_path)
|
| 42 |
+
|
| 43 |
+
print("Files found:", extracted_files) # Debugging log
|
| 44 |
return extracted_files
|
| 45 |
|
| 46 |
def read_text_from_files(file_paths):
|
| 47 |
"""Reads text content from a list of files."""
|
| 48 |
text = ""
|
| 49 |
for file_path in file_paths:
|
| 50 |
+
print(f"Reading text file: {file_path}") # Debugging log
|
| 51 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
| 52 |
text += file.read() + "\n"
|
| 53 |
return text
|
|
|
|
| 55 |
def get_text_from_pdf(pdf_files):
|
| 56 |
text = ""
|
| 57 |
for pdf_path in pdf_files:
|
| 58 |
+
print(f"Reading PDF file: {pdf_path}") # Debugging log
|
| 59 |
with open(pdf_path, "rb") as pdf_file:
|
| 60 |
reader = PyPDF2.PdfReader(pdf_file)
|
| 61 |
for page in reader.pages:
|
|
|
|
| 65 |
def get_text_from_csv(csv_files):
|
| 66 |
text = ""
|
| 67 |
for csv_path in csv_files:
|
| 68 |
+
print(f"Reading CSV file: {csv_path}") # Debugging log
|
| 69 |
df = pd.read_csv(csv_path)
|
| 70 |
text += df.to_string() + "\n"
|
| 71 |
return text
|
|
|
|
| 126 |
outputs=gr.Textbox(label="Answer")
|
| 127 |
)
|
| 128 |
|
| 129 |
+
demo.launch()
|