Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

App Files Files Community

Rafa1986 commited on Mar 15

Commit

04d8315

verified ·

1 Parent(s): d60623e

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -18

app.py CHANGED Viewed

@@ -4,17 +4,14 @@ import PyPDF2
 import pandas as pd
 import docx
 import json
-import requests
 from docx import Document
-from langchain_community.vectorstores import FAISS
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from transformers import pipeline
 # Configurar Hugging Face API Token
 HF_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
-# Carregar o modelo Mistral 7B gratuitamente do Hugging Face
-chatbot_pipeline = pipeline("text-generation", model="tiiuae/falcon-7b-instruct", token=HF_API_TOKEN)
 def extract_files_from_folder(folder_path):
     """Scans a folder for PDF, TXT, CSV, DOCX, and IPYNB files."""
@@ -66,28 +63,17 @@ def get_text_from_docx(docx_files):
             text += para.text + "\n"
     return text
-def get_text_from_ipynb(ipynb_files):
-    text = ""
-    for ipynb_path in ipynb_files:
-        with open(ipynb_path, "r", encoding="utf-8", errors="ignore") as file:
-            content = json.load(file)
-            for cell in content.get("cells", []):
-                if cell.get("cell_type") in ["markdown", "code"]:
-                    text += "\n".join(cell.get("source", [])) + "\n"
-    return text
 def combine_text_from_files(extracted_files):
     text = (
         get_text_from_pdf(extracted_files["pdf"]) +
         read_text_from_files(extracted_files["txt"]) +
         get_text_from_csv(extracted_files["csv"]) +
-        get_text_from_docx(extracted_files["docx"]) +
-        get_text_from_ipynb(extracted_files["ipynb"])
     )
     return text
 def generate_response(question, text):
-    """Uses the Mistral 7B model to answer questions based on extracted text."""
     prompt = f"Question: {question}\nBased on the following document content:\n{text[:3000]}"  # Limite de 3000 caracteres
     response = chatbot_pipeline(prompt, max_length=500, truncation=True)[0]['generated_text']
     return response.strip()

 import pandas as pd
 import docx
 import json
 from docx import Document
 from transformers import pipeline
 # Configurar Hugging Face API Token
 HF_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
+# Carregar o modelo DeepSeek Coder 1.3B
+chatbot_pipeline = pipeline("text-generation", model="deepseek-ai/deepseek-coder-1.3b", token=HF_API_TOKEN)
 def extract_files_from_folder(folder_path):
     """Scans a folder for PDF, TXT, CSV, DOCX, and IPYNB files."""
             text += para.text + "\n"
     return text
 def combine_text_from_files(extracted_files):
     text = (
         get_text_from_pdf(extracted_files["pdf"]) +
         read_text_from_files(extracted_files["txt"]) +
         get_text_from_csv(extracted_files["csv"]) +
+        get_text_from_docx(extracted_files["docx"])
     )
     return text
 def generate_response(question, text):
+    """Uses the DeepSeek Coder model to answer questions based on extracted text."""
     prompt = f"Question: {question}\nBased on the following document content:\n{text[:3000]}"  # Limite de 3000 caracteres
     response = chatbot_pipeline(prompt, max_length=500, truncation=True)[0]['generated_text']
     return response.strip()