Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

App Files Files Community

Rafa1986 commited on Mar 14

Commit

5baddf7

verified ·

1 Parent(s): fbe2154

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -25

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from io import BytesIO
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import OpenAI
 def detect_language(text):
     """Detects the language of the input text using OpenAI."""
@@ -23,20 +25,34 @@ def detect_language(text):
 # Set up OpenAI API key (replace with your key)
 openai.api_key = "YOUR_OPENAI_API_KEY"
-def extract_files_from_zip(zip_file):
-    """Extracts PDF, TXT, and CSV files from a ZIP archive."""
     extracted_files = {"pdf": [], "txt": [], "csv": []}
-    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
         for file_name in zip_ref.namelist():
-            with zip_ref.open(file_name) as file:
-                if file_name.endswith(".pdf"):
-                    extracted_files["pdf"].append(BytesIO(file.read()))
-                elif file_name.endswith(".txt"):
-                    extracted_files["txt"].append(BytesIO(file.read()))
-                elif file_name.endswith(".csv"):
-                    extracted_files["csv"].append(BytesIO(file.read()))
     return extracted_files
 def get_text_from_pdf(pdf_files):
     text = ""
     for pdf in pdf_files:
@@ -65,7 +81,7 @@ def create_vector_database(text):
     vector_db = FAISS.from_texts(texts, embeddings)
     return vector_db
-def get_answer(question, vector_db):
     retriever = vector_db.as_retriever()
     docs = retriever.get_relevant_documents(question)
@@ -77,32 +93,33 @@ def get_answer(question, vector_db):
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[
-            {"role": "system", "content": f"You are a Data Analytics assistant. Answer in {language}. Use the documents to answer questions."},
-            {"role": "user", "content": question + "\n\nBased on the following context:\n" + context}
         ]
     )
     return response["choices"][0]["message"]["content"]
-def chatbot_interface(zip_file, question):
-    text = ""
-    if zip_file:
-        extracted_files = extract_files_from_zip(zip_file)
-        text += get_text_from_pdf(extracted_files["pdf"])
-        text += get_text_from_txt(extracted_files["txt"])
-        text += get_text_from_csv(extracted_files["csv"])
     if not text:
-        return "Please upload a ZIP file containing PDFs, TXTs, or CSVs before asking questions."
     vector_db = create_vector_database(text)
-    return get_answer(question, vector_db)
 # Gradio interface
 demo = gr.Interface(
     fn=chatbot_interface,
-    inputs=[gr.File(file_types=[".zip"]),
-            gr.Textbox(placeholder="Type your question here...")],
-    outputs=gr.Textbox()
 )
 demo.launch()

 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import OpenAI
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 def detect_language(text):
     """Detects the language of the input text using OpenAI."""
 # Set up OpenAI API key (replace with your key)
 openai.api_key = "YOUR_OPENAI_API_KEY"
+def extract_files_from_zip(zip_path):
+    """Extracts PDF, TXT, and CSV files from a ZIP archive, including subfolders."""
     extracted_files = {"pdf": [], "txt": [], "csv": []}
+    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         for file_name in zip_ref.namelist():
+            if file_name.endswith(('.pdf', '.txt', '.csv')):
+                with zip_ref.open(file_name) as file:
+                    content = file.read()
+                    if file_name.endswith(".pdf"):
+                        extracted_files["pdf"].append(BytesIO(content))
+                    elif file_name.endswith(".txt"):
+                        extracted_files["txt"].append(BytesIO(content))
+                    elif file_name.endswith(".csv"):
+                        extracted_files["csv"].append(BytesIO(content))
     return extracted_files
+def analyze_text(text):
+    """Uses OpenAI to analyze notes, links, and complementary information in the text."""
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "Analyze this document and extract key points, links, and complementary information."},
+            {"role": "user", "content": text}
+        ]
+    )
+    return response["choices"][0]["message"]["content"].strip()
 def get_text_from_pdf(pdf_files):
     text = ""
     for pdf in pdf_files:
     vector_db = FAISS.from_texts(texts, embeddings)
     return vector_db
+def get_answer(question, vector_db, analysis):
     retriever = vector_db.as_retriever()
     docs = retriever.get_relevant_documents(question)
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[
+            {"role": "system", "content": f"You are a Data Analytics assistant. Answer in {language}. Use the documents and their analyses to answer questions."},
+            {"role": "user", "content": question + "\n\nBased on the following document content:\n" + context + "\n\nAdditional insights:\n" + analysis}
         ]
     )
     return response["choices"][0]["message"]["content"]
+def chatbot_interface(zip_file_path, question):
+    if not zip_file_path:
+        return "Please upload a ZIP file before asking a question."
+    extracted_files = extract_files_from_zip(zip_file_path)
+    text = get_text_from_pdf(extracted_files["pdf"]) + get_text_from_txt(extracted_files["txt"]) + get_text_from_csv(extracted_files["csv"])
     if not text:
+        return "The ZIP file does not contain valid PDF, TXT, or CSV files. Please upload supported file types."
+    analysis = analyze_text(text)
     vector_db = create_vector_database(text)
+    return get_answer(question, vector_db, analysis)
 # Gradio interface
 demo = gr.Interface(
     fn=chatbot_interface,
+    inputs=[gr.File(label="Upload ZIP File"),
+            gr.Textbox(label="Ask a question", placeholder="Type your question here...")],
+    outputs=gr.Textbox(label="Answer")
 )
 demo.launch()