Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

App Files Files Community

Rafa1986 commited on Mar 14

Commit

a64a105

verified ·

1 Parent(s): be26077

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -8

app.py CHANGED Viewed

@@ -23,8 +23,8 @@ def detect_language(text):
 openai.api_key = "YOUR_OPENAI_API_KEY"
 def extract_files_from_folder(folder_path):
-    """Scans a folder and its subfolders for PDF, TXT, and CSV files."""
-    extracted_files = {"pdf": [], "txt": [], "csv": []}
     for root, _, files in os.walk(folder_path):
         for file_name in files:
@@ -35,6 +35,8 @@ def extract_files_from_folder(folder_path):
                 extracted_files["txt"].append(file_path)
             elif file_name.endswith(".csv"):
                 extracted_files["csv"].append(file_path)
     return extracted_files
 def read_text_from_files(file_paths):
@@ -99,15 +101,12 @@ def get_answer(question, vector_db, corrected_exercises):
 def chatbot_interface(question):
     folder_path = "/mnt/data/Data Analitics/"
-    if not folder_path:
-        return "Please provide a folder path before asking a question."
     extracted_files = extract_files_from_folder(folder_path)
     text = get_text_from_pdf(extracted_files["pdf"]) + read_text_from_files(extracted_files["txt"]) + get_text_from_csv(extracted_files["csv"])
     if not text:
-        return "The folder does not contain valid PDF, TXT, or CSV files. Please upload supported file types."
     corrected_exercises = correct_exercises(text)
     vector_db = create_vector_database(text)
@@ -116,8 +115,7 @@ def chatbot_interface(question):
 # Gradio interface
 demo = gr.Interface(
     fn=chatbot_interface,
-    inputs=[gr.Textbox(label="Folder Path", placeholder="Enter the path to the folder containing the documents"),
-            gr.Textbox(label="Ask a question", placeholder="Type your question here...")],
     outputs=gr.Textbox(label="Answer")
 )

 openai.api_key = "YOUR_OPENAI_API_KEY"
 def extract_files_from_folder(folder_path):
+    """Scans a folder and its subfolders for PDF, TXT, CSV, and DOCX files."""
+    extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": []}
     for root, _, files in os.walk(folder_path):
         for file_name in files:
                 extracted_files["txt"].append(file_path)
             elif file_name.endswith(".csv"):
                 extracted_files["csv"].append(file_path)
+            elif file_name.endswith(".docx"):
+                extracted_files["docx"].append(file_path)
     return extracted_files
 def read_text_from_files(file_paths):
 def chatbot_interface(question):
     folder_path = "/mnt/data/Data Analitics/"
     extracted_files = extract_files_from_folder(folder_path)
     text = get_text_from_pdf(extracted_files["pdf"]) + read_text_from_files(extracted_files["txt"]) + get_text_from_csv(extracted_files["csv"])
     if not text:
+        return "The folder does not contain valid PDF, TXT, CSV, or DOCX files. Please upload supported file types."
     corrected_exercises = correct_exercises(text)
     vector_db = create_vector_database(text)
 # Gradio interface
 demo = gr.Interface(
     fn=chatbot_interface,
+    inputs=gr.Textbox(label="Ask a question", placeholder="Type your question here..."),
     outputs=gr.Textbox(label="Answer")
 )