Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,9 @@ def extract_files_from_folder(folder_path):
|
|
26 |
"""Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
|
27 |
extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
|
28 |
|
29 |
-
|
|
|
|
|
30 |
for file_name in files:
|
31 |
file_path = os.path.join(root, file_name)
|
32 |
if file_name.endswith(".pdf"):
|
@@ -37,12 +39,15 @@ def extract_files_from_folder(folder_path):
|
|
37 |
extracted_files["csv"].append(file_path)
|
38 |
elif file_name.endswith(".docx"):
|
39 |
extracted_files["docx"].append(file_path)
|
|
|
|
|
40 |
return extracted_files
|
41 |
|
42 |
def read_text_from_files(file_paths):
|
43 |
"""Reads text content from a list of files."""
|
44 |
text = ""
|
45 |
for file_path in file_paths:
|
|
|
46 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
47 |
text += file.read() + "\n"
|
48 |
return text
|
@@ -50,6 +55,7 @@ def read_text_from_files(file_paths):
|
|
50 |
def get_text_from_pdf(pdf_files):
|
51 |
text = ""
|
52 |
for pdf_path in pdf_files:
|
|
|
53 |
with open(pdf_path, "rb") as pdf_file:
|
54 |
reader = PyPDF2.PdfReader(pdf_file)
|
55 |
for page in reader.pages:
|
@@ -59,6 +65,7 @@ def get_text_from_pdf(pdf_files):
|
|
59 |
def get_text_from_csv(csv_files):
|
60 |
text = ""
|
61 |
for csv_path in csv_files:
|
|
|
62 |
df = pd.read_csv(csv_path)
|
63 |
text += df.to_string() + "\n"
|
64 |
return text
|
@@ -119,4 +126,4 @@ demo = gr.Interface(
|
|
119 |
outputs=gr.Textbox(label="Answer")
|
120 |
)
|
121 |
|
122 |
-
demo.launch()
|
|
|
26 |
"""Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
|
27 |
extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
|
28 |
|
29 |
+
print(f"Scanning folder: {folder_path}")
|
30 |
+
for root, subdirs, files in os.walk(folder_path):
|
31 |
+
print(f"Checking folder: {root}") # Debugging log for subfolders
|
32 |
for file_name in files:
|
33 |
file_path = os.path.join(root, file_name)
|
34 |
if file_name.endswith(".pdf"):
|
|
|
39 |
extracted_files["csv"].append(file_path)
|
40 |
elif file_name.endswith(".docx"):
|
41 |
extracted_files["docx"].append(file_path)
|
42 |
+
|
43 |
+
print("Files found:", extracted_files) # Debugging log
|
44 |
return extracted_files
|
45 |
|
46 |
def read_text_from_files(file_paths):
|
47 |
"""Reads text content from a list of files."""
|
48 |
text = ""
|
49 |
for file_path in file_paths:
|
50 |
+
print(f"Reading text file: {file_path}") # Debugging log
|
51 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
52 |
text += file.read() + "\n"
|
53 |
return text
|
|
|
55 |
def get_text_from_pdf(pdf_files):
|
56 |
text = ""
|
57 |
for pdf_path in pdf_files:
|
58 |
+
print(f"Reading PDF file: {pdf_path}") # Debugging log
|
59 |
with open(pdf_path, "rb") as pdf_file:
|
60 |
reader = PyPDF2.PdfReader(pdf_file)
|
61 |
for page in reader.pages:
|
|
|
65 |
def get_text_from_csv(csv_files):
|
66 |
text = ""
|
67 |
for csv_path in csv_files:
|
68 |
+
print(f"Reading CSV file: {csv_path}") # Debugging log
|
69 |
df = pd.read_csv(csv_path)
|
70 |
text += df.to_string() + "\n"
|
71 |
return text
|
|
|
126 |
outputs=gr.Textbox(label="Answer")
|
127 |
)
|
128 |
|
129 |
+
demo.launch()
|