Update app.py
Browse files
app.py
CHANGED
@@ -16,6 +16,8 @@ from sentence_transformers import SentenceTransformer
|
|
16 |
import threading
|
17 |
from queue import Queue
|
18 |
import concurrent.futures
|
|
|
|
|
19 |
|
20 |
class OptimizedRAGLoader:
|
21 |
def __init__(self,
|
@@ -230,13 +232,42 @@ prompt_template = ChatPromptTemplate.from_messages([
|
|
230 |
])
|
231 |
|
232 |
|
233 |
-
def process_question(question: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
"""
|
235 |
Process the question and yield the answer progressively.
|
236 |
"""
|
237 |
# Check cache first
|
238 |
if question in question_cache:
|
239 |
-
yield question_cache[question]
|
240 |
|
241 |
relevant_docs = retriever(question)
|
242 |
context = "\n".join([doc.page_content for doc in relevant_docs])
|
@@ -246,18 +277,13 @@ def process_question(question: str):
|
|
246 |
question=question
|
247 |
)
|
248 |
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
yield response, context # Renvoie la réponse mise à jour et le contexte
|
258 |
-
|
259 |
-
# Mettez le résultat en cache à la fin
|
260 |
-
# question_cache[question] = (response, context)
|
261 |
|
262 |
# CSS personnalisé avec l'importation de Google Fonts
|
263 |
custom_css = """
|
|
|
16 |
import threading
|
17 |
from queue import Queue
|
18 |
import concurrent.futures
|
19 |
+
from typing import Generator, Tuple
|
20 |
+
import time
|
21 |
|
22 |
class OptimizedRAGLoader:
|
23 |
def __init__(self,
|
|
|
232 |
])
|
233 |
|
234 |
|
235 |
+
# def process_question(question: str):
|
236 |
+
# """
|
237 |
+
# Process the question and yield the answer progressively.
|
238 |
+
# """
|
239 |
+
# # Check cache first
|
240 |
+
# if question in question_cache:
|
241 |
+
# yield question_cache[question] # Retourne directement depuis le cache si disponible
|
242 |
+
|
243 |
+
# relevant_docs = retriever(question)
|
244 |
+
# context = "\n".join([doc.page_content for doc in relevant_docs])
|
245 |
+
|
246 |
+
# prompt = prompt_template.format_messages(
|
247 |
+
# context=context,
|
248 |
+
# question=question
|
249 |
+
# )
|
250 |
+
|
251 |
+
# response = "" # Initialise la réponse
|
252 |
+
# # Ici, nous supposons que 'llm.stream' est un générateur qui renvoie des chunks
|
253 |
+
# for chunk in llm.stream(prompt): # suppose que llm.stream renvoie des chunks de réponse
|
254 |
+
# if isinstance(chunk, str):
|
255 |
+
# response += chunk # Accumulez la réponse si c'est déjà une chaîne
|
256 |
+
# else:
|
257 |
+
# response += chunk.content # Sinon, prenez le contenu du chunk (si chunk est un type d'objet spécifique)
|
258 |
+
|
259 |
+
# yield response, context # Renvoie la réponse mise à jour et le contexte
|
260 |
+
|
261 |
+
# # Mettez le résultat en cache à la fin
|
262 |
+
# # question_cache[question] = (response, context)
|
263 |
+
|
264 |
+
def process_question(question: str) -> Generator[Tuple[str, str], None, None]:
|
265 |
"""
|
266 |
Process the question and yield the answer progressively.
|
267 |
"""
|
268 |
# Check cache first
|
269 |
if question in question_cache:
|
270 |
+
yield question_cache[question]
|
271 |
|
272 |
relevant_docs = retriever(question)
|
273 |
context = "\n".join([doc.page_content for doc in relevant_docs])
|
|
|
277 |
question=question
|
278 |
)
|
279 |
|
280 |
+
current_response = ""
|
281 |
+
for chunk in llm.stream(prompt):
|
282 |
+
if isinstance(chunk, str):
|
283 |
+
current_response += chunk
|
284 |
+
else:
|
285 |
+
current_response += chunk.content
|
286 |
+
yield current_response, context
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
# CSS personnalisé avec l'importation de Google Fonts
|
289 |
custom_css = """
|