import gradio as gr | |
from langchain_mistralai.chat_models import ChatMistralAI | |
from langchain.prompts import ChatPromptTemplate | |
import os | |
from pathlib import Path | |
import json | |
import faiss | |
import numpy as np | |
from langchain.schema import Document | |
import pickle | |
import re | |
import requests | |
from functools import lru_cache | |
import torch | |
from sentence_transformers import SentenceTransformer | |
import threading | |
from queue import Queue | |
import concurrent.futures | |
from typing import Generator, Tuple | |
import time | |
class OptimizedRAGLoader: | |
def __init__(self, | |
docs_folder: str = "./docs", | |
splits_folder: str = "./splits", | |
index_folder: str = "./index"): | |
self.docs_folder = Path(docs_folder) | |
self.splits_folder = Path(splits_folder) | |
self.index_folder = Path(index_folder) | |
# Create folders if they don't exist | |
for folder in [self.splits_folder, self.index_folder]: | |
folder.mkdir(parents=True, exist_ok=True) | |
# File paths | |
self.splits_path = self.splits_folder / "splits.json" | |
self.index_path = self.index_folder / "faiss.index" | |
self.documents_path = self.index_folder / "documents.pkl" | |
# Initialize components | |
self.index = None | |
self.indexed_documents = None | |
# Initialize encoder model | |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
self.encoder = SentenceTransformer("intfloat/multilingual-e5-large") | |
self.encoder.to(self.device) | |
# Initialize thread pool | |
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) | |
# Initialize response cache | |
self.response_cache = {} | |
def encode(self, text: str): | |
"""Cached encoding function""" | |
with torch.no_grad(): | |
embeddings = self.encoder.encode( | |
text, | |
convert_to_numpy=True, | |
normalize_embeddings=True | |
) | |
return embeddings | |
def batch_encode(self, texts: list): | |
"""Batch encoding for multiple texts""" | |
with torch.no_grad(): | |
embeddings = self.encoder.encode( | |
texts, | |
batch_size=32, | |
convert_to_numpy=True, | |
normalize_embeddings=True, | |
show_progress_bar=False | |
) | |
return embeddings | |
def load_and_split_texts(self): | |
if self._splits_exist(): | |
return self._load_existing_splits() | |
documents = [] | |
futures = [] | |
for file_path in self.docs_folder.glob("*.txt"): | |
future = self.executor.submit(self._process_file, file_path) | |
futures.append(future) | |
for future in concurrent.futures.as_completed(futures): | |
documents.extend(future.result()) | |
self._save_splits(documents) | |
return documents | |
def _process_file(self, file_path): | |
with open(file_path, 'r', encoding='utf-8') as file: | |
text = file.read() | |
chunks = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()] | |
return [ | |
Document( | |
page_content=chunk, | |
metadata={ | |
'source': file_path.name, | |
'chunk_id': i, | |
'total_chunks': len(chunks) | |
} | |
) | |
for i, chunk in enumerate(chunks) | |
] | |
def load_index(self) -> bool: | |
""" | |
Charge l'index FAISS et les documents associés s'ils existent | |
Returns: | |
bool: True si l'index a été chargé, False sinon | |
""" | |
if not self._index_exists(): | |
print("Aucun index trouvé.") | |
return False | |
print("Chargement de l'index existant...") | |
try: | |
# Charger l'index FAISS | |
self.index = faiss.read_index(str(self.index_path)) | |
# Charger les documents associés | |
with open(self.documents_path, 'rb') as f: | |
self.indexed_documents = pickle.load(f) | |
print(f"Index chargé avec {self.index.ntotal} vecteurs") | |
return True | |
except Exception as e: | |
print(f"Erreur lors du chargement de l'index: {e}") | |
return False | |
def create_index(self, documents=None): | |
if documents is None: | |
documents = self.load_and_split_texts() | |
if not documents: | |
return False | |
texts = [doc.page_content for doc in documents] | |
embeddings = self.batch_encode(texts) | |
dimension = embeddings.shape[1] | |
self.index = faiss.IndexFlatL2(dimension) | |
if torch.cuda.is_available(): | |
# Use GPU for FAISS if available | |
res = faiss.StandardGpuResources() | |
self.index = faiss.index_cpu_to_gpu(res, 0, self.index) | |
self.index.add(np.array(embeddings).astype('float32')) | |
self.indexed_documents = documents | |
# Save index and documents | |
cpu_index = faiss.index_gpu_to_cpu(self.index) if torch.cuda.is_available() else self.index | |
faiss.write_index(cpu_index, str(self.index_path)) | |
with open(self.documents_path, 'wb') as f: | |
pickle.dump(documents, f) | |
return True | |
def _index_exists(self) -> bool: | |
"""Vérifie si l'index et les documents associés existent""" | |
return self.index_path.exists() and self.documents_path.exists() | |
def get_retriever(self, k: int = 10): | |
if self.index is None: | |
if not self.load_index(): | |
if not self.create_index(): | |
raise ValueError("Unable to load or create index") | |
def retriever_function(query: str) -> list: | |
# Check cache first | |
cache_key = f"{query}_{k}" | |
if cache_key in self.response_cache: | |
return self.response_cache[cache_key] | |
query_embedding = self.encode(query) | |
distances, indices = self.index.search( | |
np.array([query_embedding]).astype('float32'), | |
k | |
) | |
results = [ | |
self.indexed_documents[idx] | |
for idx in indices[0] | |
if idx != -1 | |
] | |
# Cache the results | |
self.response_cache[cache_key] = results | |
return results | |
return retriever_function | |
# Initialize components | |
mistral_api_key = os.getenv("mistral_api_key") | |
llm = ChatMistralAI( | |
model="mistral-large-latest", | |
mistral_api_key=mistral_api_key, | |
temperature=0.1, | |
streaming=True, | |
) | |
rag_loader = OptimizedRAGLoader() | |
retriever = rag_loader.get_retriever(k=10) # Reduced k for faster retrieval | |
# Cache for processed questions | |
question_cache = {} | |
prompt_template = ChatPromptTemplate.from_messages([ | |
("system", """أنت مساعد مفيد يجيب على الأسئلة باللغة العربية باستخدام المعلومات المقدمة. | |
استخدم المعلومات التالية للإجابة على السؤال: | |
{context} | |
إذا لم تكن المعلومات كافية للإجابة على السؤال بشكل كامل، قم بتوضيح ذلك. | |
أجب بشكل موجز ودقيق. | |
/n | |
أذكر رقم المادة المصدر. | |
أذكر اسم ورقم القانون. | |
"""), | |
("human", "{question}") | |
]) | |
import gradio as gr | |
from typing import Iterator | |
# Ajouter du CSS pour personnaliser l'apparence | |
css = """ | |
/* Reset RTL global */ | |
*, *::before, *::after { | |
direction: rtl !important; | |
text-align: right !important; | |
} | |
body { | |
font-family: 'Amiri', sans-serif; /* Utilisation de la police Arabe andalouse */ | |
background-color: white; /* Fond blanc */ | |
color: black; /* Texte noir */ | |
direction: rtl !important; /* Texte en arabe aligné à droite */ | |
} | |
.gradio-container { | |
direction: rtl !important; /* Alignement RTL pour toute l'interface */ | |
} | |
/* Éléments de formulaire */ | |
input[type="text"], | |
.gradio-textbox input, | |
textarea { | |
border-radius: 20px; | |
padding: 10px 15px; | |
border: 2px solid #000; | |
font-size: 16px; | |
width: 80%; | |
margin: 0 auto; | |
text-align: right !important; | |
} | |
/* Surcharge des styles de placeholder */ | |
input::placeholder, | |
textarea::placeholder { | |
text-align: right !important; | |
direction: rtl !important; | |
} | |
/* Boutons */ | |
.gradio-button { | |
border-radius: 20px; | |
font-size: 16px; | |
background-color: #007BFF; | |
color: white; | |
padding: 10px 20px; | |
margin: 10px auto; | |
border: none; | |
width: 80%; | |
display: block; | |
} | |
.gradio-button:hover { | |
background-color: #0056b3; | |
} | |
.gradio-chatbot .message { | |
border-radius: 20px; | |
padding: 10px; | |
margin: 10px 0; | |
background-color: #f1f1f1; | |
border: 1px solid #ddd; | |
width: 80%; | |
text-align: right !important; | |
direction: rtl !important; | |
} | |
/* Messages utilisateur alignés à gauche */ | |
.gradio-chatbot .user-message { | |
margin-right: auto; | |
background-color: #e3f2fd; | |
text-align: right !important; | |
direction: rtl !important; | |
} | |
/* Messages assistant alignés à droite */ | |
.gradio-chatbot .assistant-message { | |
margin-right: auto; | |
background-color: #f1f1f1; | |
text-align: right | |
} | |
/* Corrections RTL pour les éléments spécifiques */ | |
.gradio-textbox textarea { | |
text-align: right !important; | |
} | |
.gradio-dropdown div { | |
text-align: right !important; | |
} | |
""" | |
def process_question(question: str) -> Iterator[str]: | |
""" | |
Process the question and return a response generator for streaming. | |
""" | |
if question in question_cache: | |
yield question_cache[question][0] | |
return | |
relevant_docs = retriever(question) | |
context = "\n".join([doc.page_content for doc in relevant_docs]) | |
prompt = prompt_template.format_messages( | |
context=context, | |
question=question | |
) | |
full_response = "" | |
try: | |
for chunk in llm.stream(prompt): | |
if isinstance(chunk, str): | |
current_chunk = chunk | |
else: | |
current_chunk = chunk.content | |
full_response += current_chunk | |
yield full_response # Send the updated response in streaming | |
question_cache[question] = (full_response, context) | |
except Exception as e: | |
yield f"Erreur lors du traitement : {str(e)}" | |
# def process_question(question: str) -> tuple[str, list[str]]: | |
# # Check cache first | |
# if question in question_cache: | |
# return question_cache[question] | |
# # Get relevant documents using the retriever | |
# relevant_docs = retriever(question) | |
# # Extract the content and sources | |
# context = "\n".join(doc.page_content for doc in relevant_docs) | |
# sources = [doc.metadata["source"] for doc in relevant_docs] | |
# sources = os.path.splitext(sources[0])[0] if sources else "غير معروف" | |
# # Generate the prompt with the context | |
# prompt = prompt_template.format( | |
# context=context, | |
# question=question | |
# ) | |
# full_response = "" | |
# try: | |
# for chunk in llm.stream(prompt): | |
# if isinstance(chunk, str): | |
# current_chunk = chunk | |
# else: | |
# current_chunk = chunk.content | |
# full_response += current_chunk | |
# yield full_response, sources # Send the updated response in streaming | |
# question_cache[question,sources] = (full_response, context) | |
# except Exception as e: | |
# yield f"Erreur lors du traitement : {str(e)}" | |
def gradio_stream(question: str, chat_history: list) -> Iterator[list]: | |
""" | |
Format the output for Gradio Chatbot component with streaming. | |
""" | |
full_response = "" | |
try: | |
for partial_response in process_question(question): | |
full_response = partial_response | |
# Append the latest assistant response to chat history | |
updated_chat = chat_history + [[question, partial_response]] | |
yield updated_chat | |
except Exception as e: | |
# Handle errors during streaming | |
updated_chat = chat_history + [[question, f"Erreur : {str(e)}"]] | |
yield updated_chat | |
# Gradio interface | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown("<h2 style='text-align: center !important;'>هذا تطبيق للاجابة على الأسئلة المتعلقة يالقوانين المغربية</h2>") | |
# Organisation en 3 lignes | |
with gr.Row(): # Première ligne: Question | |
message = gr.Textbox(label="أدخل سؤالك", placeholder="اكتب سؤالك هنا", elem_id="question_input") | |
with gr.Row(): # Deuxième ligne: Bouton de recherche | |
send = gr.Button("بحث", elem_id="search_button") | |
with gr.Row(): # Troisième ligne: Affichage de la réponse | |
chatbot = gr.Chatbot(label="") | |
# Fonction de mise à jour pour l'utilisateur | |
def user_input(user_message, chat_history): | |
return "", chat_history + [[user_message, None]] | |
send.click(user_input, [message, chatbot], [message, chatbot], queue=False) | |
send.click(gradio_stream, [message, chatbot], chatbot) | |
demo.launch(share=True) | |
# # def process_question(question: str): | |
# # """ | |
# # Process the question and yield the answer progressively. | |
# # """ | |
# # # Check cache first | |
# # if question in question_cache: | |
# # yield question_cache[question] # Retourne directement depuis le cache si disponible | |
# # relevant_docs = retriever(question) | |
# # context = "\n".join([doc.page_content for doc in relevant_docs]) | |
# # prompt = prompt_template.format_messages( | |
# # context=context, | |
# # question=question | |
# # ) | |
# # response = "" # Initialise la réponse | |
# # # Ici, nous supposons que 'llm.stream' est un générateur qui renvoie des chunks | |
# # for chunk in llm.stream(prompt): # suppose que llm.stream renvoie des chunks de réponse | |
# # if isinstance(chunk, str): | |
# # response += chunk # Accumulez la réponse si c'est déjà une chaîne | |
# # else: | |
# # response += chunk.content # Sinon, prenez le contenu du chunk (si chunk est un type d'objet spécifique) | |
# # yield response, context # Renvoie la réponse mise à jour et le contexte | |
# # # Mettez le résultat en cache à la fin | |
# # # question_cache[question] = (response, context) | |
# def process_question(question: str) -> Generator[Tuple[str, str], None, None]: | |
# """ | |
# Process the question and yield the answer progressively. | |
# """ | |
# # Check cache first | |
# if question in question_cache: | |
# yield question_cache[question] | |
# relevant_docs = retriever(question) | |
# context = "\n".join([doc.page_content for doc in relevant_docs]) | |
# prompt = prompt_template.format_messages( | |
# context=context, | |
# question=question | |
# ) | |
# current_response = "" | |
# for chunk in llm.stream(prompt): | |
# if isinstance(chunk, str): | |
# current_response += chunk | |
# else: | |
# current_response += chunk.content | |
# yield current_response, context | |
# # Mettez le résultat en cache à la fin | |
# question_cache[question] = (response, context) | |
# # CSS personnalisé avec l'importation de Google Fonts | |
# custom_css = """ | |
# /* Import Google Fonts - Noto Sans Arabic */ | |
# @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@300;400;500;600;700&display=swap'); | |
# /* Styles généraux */ | |
# :root { | |
# --primary-color: #4299e1; | |
# --secondary-color: #666666; | |
# --accent-color: #4299E1; | |
# --background-color: #ffffff; | |
# --border-radius: 8px; | |
# --font-family-arabic: 'Noto Sans Arabic', Arial, sans-serif; | |
# } | |
# /* Style de base */ | |
# body { | |
# font-family: var(--font-family-arabic); | |
# background-color: var(--background-color); | |
# color: var(--primary-color); | |
# } | |
# /* Styles pour le texte RTL */ | |
# .rtl-text { | |
# text-align: right !important; | |
# direction: rtl !important; | |
# font-family: var(--font-family-arabic) !important; | |
# } | |
# .rtl-text textarea { | |
# text-align: right !important; | |
# direction: rtl !important; | |
# padding: 1rem !important; | |
# border-radius: var(--border-radius) !important; | |
# border: 1px solid #E2E8F0 !important; | |
# background-color: #ffffff !important; | |
# color: var(--primary-color) !important; | |
# font-size: 1.1rem !important; | |
# line-height: 1.6 !important; | |
# font-family: var(--font-family-arabic) !important; | |
# } | |
# /* Style du titre */ | |
# .app-title { | |
# font-family: var(--font-family-arabic) !important; | |
# font-size: 2rem !important; | |
# font-weight: 700 !important; | |
# color: white !important; /* Texte en blanc */ | |
# background-color: #3e2b1f !important; /* Fond marron foncé */ | |
# margin-bottom: 1rem !important; | |
# margin-top: 1rem !important; | |
# text-align: center !important; | |
# } | |
# /* Styles des étiquettes */ | |
# .rtl-text label { | |
# font-family: var(--font-family-arabic) !important; | |
# font-size: 1.2rem !important; | |
# font-weight: 600 !important; | |
# color: #000000 !important; /* Couleur noire pour les étiquettes */ | |
# margin-bottom: 0.5rem !important; | |
# } | |
# /* Centrer le bouton */ | |
# button.primary-button { | |
# font-family: var(--font-family-arabic) !important; | |
# background-color: var(--accent-color) !important; | |
# color: white !important; | |
# padding: 0.75rem 1.5rem !important; | |
# border-radius: var(--border-radius) !important; | |
# font-weight: 600 !important; | |
# font-size: 1.1rem !important; | |
# transition: all 0.3s ease !important; | |
# width: 200px !important; /* Réduit la largeur du bouton */ | |
# margin: 0 auto !important; /* Centrage horizontal */ | |
# display: block !important; /* Nécessaire pour que le margin auto fonctionne */ | |
# } | |
# button.primary-button:hover { | |
# background-color: #3182CE !important; | |
# transform: translateY(-1px) !important; | |
# } | |
# /* Styles des boîtes de texte */ | |
# .textbox-container { | |
# background-color: #b45f06 !important; | |
# padding: 1.5rem !important; | |
# border-radius: var(--border-radius) !important; | |
# box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important; | |
# margin-bottom: 1rem !important; | |
# } | |
# /* Animation de chargement */ | |
# .loading { | |
# animation: pulse 2s infinite; | |
# } | |
# @keyframes pulse { | |
# 0% { opacity: 1; } | |
# 50% { opacity: 0.5; } | |
# 100% { opacity: 1; } | |
# } | |
# /* Style du statut */ | |
# .status-text { | |
# font-family: var(--font-family-arabic) !important; | |
# text-align: center !important; | |
# color: var(--secondary-color) !important; | |
# font-size: 1rem !important; | |
# margin-top: 1rem !important; | |
# } | |
# """ | |
# # Interface Gradio avec streaming | |
# with gr.Blocks(css=custom_css) as iface: | |
# with gr.Column(elem_classes="container"): | |
# gr.Markdown( | |
# "# نظام الأسئلة والأجوبة الذكي", | |
# elem_classes="app-title rtl-text" | |
# ) | |
# with gr.Column(elem_classes="textbox-container"): | |
# input_text = gr.Textbox( | |
# label="السؤال", | |
# placeholder="اكتب سؤالك هنا...", | |
# lines=1, | |
# elem_classes="rtl-text" | |
# ) | |
# with gr.Row(): | |
# with gr.Column(): | |
# answer_box = gr.Textbox( | |
# label="الإجابة", | |
# lines=5, | |
# elem_classes="rtl-text textbox-container" | |
# ) | |
# submit_btn = gr.Button( | |
# "إرسال السؤال", | |
# elem_classes="primary-button", | |
# variant="primary" | |
# ) | |
# # def stream_response(question): | |
# # response_stream = process_question(question) | |
# # for response, _ in response_stream: | |
# # gr.update(value=response) | |
# # yield response | |
# def stream_response(question): | |
# for chunk_response, _ in process_question(question): | |
# yield chunk_response | |
# time.sleep(0.05) | |
# submit_btn.click( | |
# fn=stream_response, | |
# inputs=input_text, | |
# outputs=answer_box, | |
# api_name="predict", | |
# queue=False | |
# ) | |
# if __name__ == "__main__": | |
# iface.launch( | |
# share=True, | |
# server_name="0.0.0.0", | |
# server_port=7860, | |
# max_threads=3, | |
# show_error=True | |
# ) | |
# # # Interface Gradio avec la correction | |
# # with gr.Blocks(css=custom_css) as iface: | |
# # with gr.Column(elem_classes="container"): | |
# # gr.Markdown( | |
# # "# نظام الأسئلة والأجوبة الذكي", | |
# # elem_classes="app-title rtl-text" | |
# # ) | |
# # with gr.Column(elem_classes="textbox-container"): | |
# # input_text = gr.Textbox( | |
# # label="السؤال", | |
# # placeholder="اكتب سؤالك هنا...", | |
# # lines=1, | |
# # elem_classes="rtl-text" | |
# # ) | |
# # with gr.Row(): | |
# # with gr.Column(): | |
# # answer_box = gr.Textbox( | |
# # label="الإجابة", | |
# # lines=5, | |
# # elem_classes="rtl-text textbox-container" | |
# # ) | |
# # # with gr.Column(scale=1): | |
# # # context_box = gr.Textbox( | |
# # # label="السياق المستخدم", | |
# # # lines=4, | |
# # # elem_classes="rtl-text textbox-container" | |
# # # ) | |
# # submit_btn = gr.Button( | |
# # "إرسال السؤال", | |
# # elem_classes="primary-button", | |
# # variant="primary" | |
# # ) | |
# # def on_submit(question): | |
# # for response, context in process_question(question): | |
# # yield response # Yield À CHAQUE itération | |
# # submit_btn.click( | |
# # fn=on_submit, | |
# # inputs=input_text, | |
# # outputs=answer_box, | |
# # api_name="predict", | |
# # queue=False, | |
# # ) | |
# # if __name__ == "__main__": | |
# # iface.launch( | |
# # share=True, | |
# # server_name="0.0.0.0", | |
# # server_port=7860, | |
# # max_threads=3, | |
# # show_error=True, | |
# # ) | |
# # def process_question(question: str): | |
# # """ | |
# # Process the question and yield the answer progressively. | |
# # """ | |
# # # Check cache first | |
# # if question in question_cache: | |
# # yield question_cache[question] # Retourne directement depuis le cache si disponible | |
# # relevant_docs = retriever(question) | |
# # context = "\n".join([doc.page_content for doc in relevant_docs]) | |
# # prompt = prompt_template.format_messages( | |
# # context=context, | |
# # question=question | |
# # ) | |
# # response = "" # Initialise la réponse | |
# # # Ici, nous supposons que 'llm.stream' est un générateur qui renvoie des chunks | |
# # for chunk in llm.stream(prompt): # suppose que llm.stream renvoie des chunks de réponse | |
# # if isinstance(chunk, str): | |
# # response += chunk # Accumulez la réponse si c'est déjà une chaîne | |
# # else: | |
# # response += chunk.content # Sinon, prenez le contenu du chunk (si chunk est un type d'objet spécifique) | |
# # yield response, context # Renvoie la réponse mise à jour et le contexte | |
# # # Mettez le résultat en cache à la fin | |
# # question_cache[question] = (response, context) | |
# # # Custom CSS for right-aligned text in textboxes | |
# # custom_css = """ | |
# # .rtl-text { | |
# # text-align: right !important; | |
# # direction: rtl !important; | |
# # } | |
# # .rtl-text textarea { | |
# # text-align: right !important; | |
# # direction: rtl !important; | |
# # } | |
# # """ | |
# # # Gradio interface with queue | |
# # with gr.Blocks(css=custom_css) as iface: | |
# # with gr.Column(): | |
# # input_text = gr.Textbox( | |
# # label="السؤال", | |
# # placeholder="اكتب سؤالك هنا...", | |
# # lines=2, | |
# # elem_classes="rtl-text" | |
# # ) | |
# # with gr.Row(): | |
# # answer_box = gr.Textbox( | |
# # label="الإجابة", | |
# # lines=4, | |
# # elem_classes="rtl-text" | |
# # ) | |
# # context_box = gr.Textbox( | |
# # label="السياق المستخدم", | |
# # lines=8, | |
# # elem_classes="rtl-text" | |
# # ) | |
# # submit_btn = gr.Button("إرسال") | |
# # submit_btn.click( | |
# # fn=process_question, | |
# # inputs=input_text, | |
# # outputs=[answer_box, context_box], | |
# # api_name="predict", | |
# # queue=True # Utiliser le système de queue pour un traitement asynchrone | |
# # ) | |
# # if __name__ == "__main__": | |
# # iface.launch( | |
# # share=True, | |
# # server_name="0.0.0.0", | |
# # server_port=7860, | |
# # max_threads=3, # Controls concurrency | |
# # show_error=True | |
# # ) | |
# # def process_question(question: str): | |
# # """ | |
# # Process the question and return the answer and context | |
# # """ | |
# # # Check cache first | |
# # if question in question_cache: | |
# # return question_cache[question], "" # Retourne la réponse cachée et un statut vide | |
# # relevant_docs = retriever(question) | |
# # context = "\n".join([doc.page_content for doc in relevant_docs]) | |
# # prompt = prompt_template.format_messages( | |
# # context=context, | |
# # question=question | |
# # ) | |
# # response = "" | |
# # for chunk in llm.stream(prompt): | |
# # if isinstance(chunk, str): | |
# # response += chunk | |
# # else: | |
# # response += chunk.content | |
# # # Mettez le résultat en cache à la fin | |
# # question_cache[question] = (response, context) | |
# # return response, context | |