Spaces:
Sleeping
Sleeping
File size: 5,996 Bytes
9cc30f1 0aecada 9f33690 b374d39 7dbc57c 9f33690 0aecada b374d39 0aecada 370a2b2 0aecada a602253 0aecada a602253 7dbc57c b374d39 7dbc57c 9f33690 370a2b2 7dbc57c 370a2b2 7dbc57c 370a2b2 7dbc57c 370a2b2 7dbc57c 370a2b2 74b66c1 7dbc57c 9f33690 74b66c1 7dbc57c 9f33690 370a2b2 74b66c1 7dbc57c 74b66c1 7dbc57c 9f33690 7dbc57c 9f33690 a602253 7dbc57c 74b66c1 a602253 7dbc57c 370a2b2 7dbc57c a602253 7dbc57c a602253 9f33690 7dbc57c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import os
import sys
from dotenv import load_dotenv
import gradio as gr
from huggingface_hub import InferenceClient
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.schema import Document
load_dotenv()
hftoken = os.environ.get("HFTOKEN")
from huggingface_hub import login
login(token=hftoken)
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=hftoken)
vector_store = None
# model_kwargs = {'device': 'cpu'}
# encode_kwargs = {'normalize_embeddings': False}
model_name="sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(
model_name=model_name
# model_kwargs=model_kwargs,
# encode_kwargs=encode_kwargs
)
def extract_text_from_file(file_path):
"""Extrait le texte d'un fichier PDF ou TXT."""
try:
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == ".pdf":
loader = PyPDFLoader(file_path)
pages = loader.load()
docs = [Document(page_content=page.page_content) for page in pages]
elif file_extension == ".txt":
with open(file_path, "r", encoding="utf-8") as file:
text = file.read()
docs = [Document(page_content=text)]
else:
return None, "Format non pris en charge. Téléchargez un PDF ou TXT."
return docs, None
except Exception as e:
return None, f"Erreur lors de la lecture du fichier : {e}"
def embed_documents(file):
"""Convertit un document en vecteurs FAISS et génère un résumé."""
global vector_store
docs, error = extract_text_from_file(file.name)
if error:
return error
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
documents = text_splitter.split_documents(docs)
if documents:
vector_store = FAISS.from_documents(documents, embeddings)
full_text = "\n".join([doc.page_content for doc in documents])
summary = summarize_text(full_text)
return f"✅ Document indexé avec succès !\n\n📌 **Résumé du fichier** :\n{summary}"
else:
return "❌ Aucun texte trouvable dans le fichier."
def summarize_text(text):
"""Utilise le modèle HF pour générer un résumé du document."""
messages = [{"role": "system", "content": "Résume ce texte en quelques phrases :"}, {"role": "user", "content": text}]
response = client.chat_completion(messages, max_tokens=200, temperature=0.5)
return response.choices[0].message["content"]
def query_faiss(query):
"""Recherche les documents pertinents dans FAISS et retourne une réponse reformulée."""
if vector_store is None:
return "❌ Aucun document indexé. Téléchargez un fichier."
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
results = retriever.get_relevant_documents(query)
if not results:
return "Je n'ai pas trouvé d'informations pertinentes dans les documents."
context = "\n".join([doc.page_content for doc in results])
messages = [
{"role": "system", "content": "Réponds à la question en utilisant les informations suivantes sans les copier mot pour mot."},
{"role": "user", "content": f"Contexte : {context}\nQuestion : {query}"}
]
response = client.chat_completion(messages, max_tokens=200, temperature=0.5)
return response.choices[0].message["content"]
def respond(message, history, system_message, max_tokens, temperature, top_p, file=None):
"""Gère la réponse du chatbot avec FAISS et Hugging Face."""
global vector_store
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
if file:
response = embed_documents(file)
yield response
return
context = query_faiss(message)
if "❌" not in context:
messages.append({"role": "user", "content": f"Contexte : {context}\nQuestion : {message}"})
else:
messages.append({"role": "user", "content": message})
response = ""
for msg in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = msg.choices[0].delta.content
response += token
yield response
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📚 Chatbot avec intégration de documents")
gr.Image(value="logo-gaia.png", label="Logo")
with gr.Row():
with gr.Column():
gr.Markdown("## ⚙️ Paramètres")
with gr.Accordion("Réglages avancés", open=False):
system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
file_upload = gr.File(label="📂 Télécharger un fichier PDF ou TXT", file_types=[".pdf", ".txt"], type="filepath")
file_output = gr.Textbox()
file_upload.change(embed_documents, inputs=file_upload, outputs=file_output)
with gr.Column():
gr.Markdown("## 💬 Chat")
chatbot = gr.ChatInterface(
respond,
additional_inputs=[system_message, max_tokens, temperature, top_p, file_upload],
)
if __name__ == "__main__":
demo.launch(share=True)
|