Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

App Files Files Community

Rafa1986 commited on Mar 14

Commit

51ac55a

verified ·

1 Parent(s): 5b9b221

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -21

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import PyPDF2
 import pandas as pd
 import openai
 import docx
 from docx import Document
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
@@ -45,13 +46,31 @@ def extract_files_from_folder(folder_path):
     print("Files found:", extracted_files)  # Debugging log
     return extracted_files
 def read_text_from_files(file_paths):
     """Reads text content from a list of files."""
     text = ""
     for file_path in file_paths:
         print(f"Reading text file: {file_path}")  # Debugging log
         with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
-            text += file.read() + "\n"
     return text
 def get_text_from_pdf(pdf_files):
@@ -103,26 +122,8 @@ def correct_exercises(text):
     )
     return response["choices"][0]["message"]["content"].strip()
-def get_answer(question, vector_db, corrected_exercises):
-    retriever = vector_db.as_retriever()
-    docs = retriever.get_relevant_documents(question)
-    if not docs:
-        return "I could not find the answer in the documents. Do you want me to search external sources?"
-    context = "\n".join([doc.page_content for doc in docs])
-    language = detect_language(question)
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": f"You are a Data Analytics assistant. Answer in {language}. Use the documents to answer questions. Also, use the corrected exercises if relevant."},
-            {"role": "user", "content": question + "\n\nBased on the following document content:\n" + context + "\n\nCorrected Exercises:\n" + corrected_exercises}
-        ]
-    )
-    return response["choices"][0]["message"]["content"]
 def chatbot_interface(question):
-    folder_path = "/mnt/data/Data Analitics/"
     extracted_files = extract_files_from_folder(folder_path)
     text = get_text_from_pdf(extracted_files["pdf"]) + read_text_from_files(extracted_files["txt"]) + get_text_from_csv(extracted_files["csv"]) + get_text_from_docx(extracted_files["docx"])
@@ -141,4 +142,4 @@ demo = gr.Interface(
     outputs=gr.Textbox(label="Answer")
 )
-demo.launch()

 import pandas as pd
 import openai
 import docx
+import requests
 from docx import Document
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
     print("Files found:", extracted_files)  # Debugging log
     return extracted_files
+def extract_links_from_text(text):
+    """Extracts links from text files and fetches their content."""
+    import re
+    links = re.findall(r'https?://\S+', text)
+    extracted_content = ""
+    for link in links:
+        try:
+            response = requests.get(link, timeout=5)
+            if response.status_code == 200:
+                extracted_content += f"\n[Extracted from {link}]\n" + response.text[:1000]  # Limit to first 1000 chars
+        except requests.exceptions.RequestException:
+            extracted_content += f"\n[Could not access {link}]\n"
+    return extracted_content
 def read_text_from_files(file_paths):
     """Reads text content from a list of files."""
     text = ""
     for file_path in file_paths:
         print(f"Reading text file: {file_path}")  # Debugging log
         with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
+            file_text = file.read()
+            text += file_text + "\n"
+            text += extract_links_from_text(file_text)  # Extract and add web content
     return text
 def get_text_from_pdf(pdf_files):
     )
     return response["choices"][0]["message"]["content"].strip()
 def chatbot_interface(question):
+    folder_path = "/mnt/data/New_Data_Analytics/"
     extracted_files = extract_files_from_folder(folder_path)
     text = get_text_from_pdf(extracted_files["pdf"]) + read_text_from_files(extracted_files["txt"]) + get_text_from_csv(extracted_files["csv"]) + get_text_from_docx(extracted_files["docx"])
     outputs=gr.Textbox(label="Answer")
 )
+demo.launch()