Spaces:

Trabis
/

Lois_RAG

Running

App Files Files Community

Trabis commited on 10 days ago

Commit

83bd64d

verified ·

1 Parent(s): 89f5e63

Update app.py

Browse files

Files changed (1) hide show

app.py +1067 -245

app.py CHANGED Viewed

@@ -1,3 +1,448 @@
 import gradio as gr
 from langchain_mistralai.chat_models import ChatMistralAI
 from langchain.prompts import ChatPromptTemplate
@@ -19,44 +464,63 @@ from sentence_transformers.cross_encoder import CrossEncoder
 import threading
 from queue import Queue
 import concurrent.futures
-from typing import Generator, Tuple, Iterator
 import time
 class OptimizedRAGLoader:
     def __init__(self,
                  docs_folder: str = "./docs",
                  splits_folder: str = "./splits",
                  index_folder: str = "./index"):
         self.docs_folder = Path(docs_folder)
         self.splits_folder = Path(splits_folder)
         self.index_folder = Path(index_folder)
         # Create folders if they don't exist
         for folder in [self.splits_folder, self.index_folder]:
             folder.mkdir(parents=True, exist_ok=True)
         # File paths
         self.splits_path = self.splits_folder / "splits.json"
         self.index_path = self.index_folder / "faiss.index"
         self.documents_path = self.index_folder / "documents.pkl"
         # Initialize components
         self.index = None
         self.indexed_documents = None
         # Initialize encoder model
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.encoder = SentenceTransformer("intfloat/multilingual-e5-large")
         self.encoder.to(self.device)
-        self.reranker = model = CrossEncoder("cross-encoder/mmarco-mMiniLMv2-L12-H384-v1",trust_remote_code=True)
         # Initialize thread pool
         self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
         # Initialize response cache
         self.response_cache = {}
     @lru_cache(maxsize=1000)
     def encode(self, text: str):
         """Cached encoding function"""
@@ -64,362 +528,720 @@ class OptimizedRAGLoader:
             embeddings = self.encoder.encode(
                 text,
                 convert_to_numpy=True,
-                normalize_embeddings=True
             )
         return embeddings
     def batch_encode(self, texts: list):
         """Batch encoding for multiple texts"""
         with torch.no_grad():
             embeddings = self.encoder.encode(
                 texts,
-                batch_size=32,
                 convert_to_numpy=True,
                 normalize_embeddings=True,
-                show_progress_bar=False
             )
         return embeddings
-    def load_and_split_texts(self):
         if self._splits_exist():
-            return self._load_existing_splits()
         documents = []
         futures = []
-        for file_path in self.docs_folder.glob("*.txt"):
             future = self.executor.submit(self._process_file, file_path)
             futures.append(future)
         for future in concurrent.futures.as_completed(futures):
-            documents.extend(future.result())
-        self._save_splits(documents)
         return documents
-    def _process_file(self, file_path):
-        with open(file_path, 'r', encoding='utf-8') as file:
-            text = file.read()
-            chunks = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
-            return [
-                Document(
-                    page_content=chunk,
-                    metadata={
-                        'source': file_path.name,
-                        'chunk_id': i,
-                        'total_chunks': len(chunks)
-                    }
-                )
-                for i, chunk in enumerate(chunks)
-            ]
-    def load_index(self) -> bool:
-        """
-        Charge l'index FAISS et les documents associés s'ils existent
-        Returns:
-            bool: True si l'index a été chargé, False sinon
-        """
         if not self._index_exists():
-            print("Aucun index trouvé.")
             return False
-        print("Chargement de l'index existant...")
         try:
-            # Charger l'index FAISS
             self.index = faiss.read_index(str(self.index_path))
-            # Charger les documents associés
             with open(self.documents_path, 'rb') as f:
                 self.indexed_documents = pickle.load(f)
-            print(f"Index chargé avec {self.index.ntotal} vecteurs")
             return True
         except Exception as e:
-            print(f"Erreur lors du chargement de l'index: {e}")
             return False
-    def create_index(self, documents=None):
         if documents is None:
             documents = self.load_and_split_texts()
         if not documents:
             return False
         texts = [doc.page_content for doc in documents]
         embeddings = self.batch_encode(texts)
         dimension = embeddings.shape[1]
-        self.index = faiss.IndexFlatL2(dimension)
         if torch.cuda.is_available():
-            # Use GPU for FAISS if available
-            res = faiss.StandardGpuResources()
-            self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
-        self.index.add(np.array(embeddings).astype('float32'))
         self.indexed_documents = documents
-        # Save index and documents
-        cpu_index = faiss.index_gpu_to_cpu(self.index) if torch.cuda.is_available() else self.index
-        faiss.write_index(cpu_index, str(self.index_path))
-        with open(self.documents_path, 'wb') as f:
-            pickle.dump(documents, f)
-        return True
     def _index_exists(self) -> bool:
-        """Vérifie si l'index et les documents associés existent"""
         return self.index_path.exists() and self.documents_path.exists()
-    def get_retriever(self, k: int = 10):
-        if self.index is None:
-            if not self.load_index():
-                if not self.create_index():
-                    raise ValueError("Unable to load or create index")
-        def retriever_function(query: str) -> list:
-            # Check cache first
-            cache_key = f"{query}_{k}"
-            if cache_key in self.response_cache:
-                return self.response_cache[cache_key]
             query_embedding = self.encode(query)
-            distances, indices = self.index.search(
-                np.array([query_embedding]).astype('float32'),
-                k
-            )
-            results = [
                 self.indexed_documents[idx]
                 for idx in indices[0]
-                if idx != -1
             ]
-            # Cache the results
-            self.response_cache[cache_key] = results
-            return results
         return retriever_function
-# # Initialize components
-# mistral_api_key = os.getenv("mistral_api_key")
-# llm = ChatMistralAI(
-#     model="mistral-large-latest",
-#     mistral_api_key=mistral_api_key,
-#     temperature=0.01,
-#     streaming=True,
-# )
 # deepseek_api_key = os.getenv("DEEPSEEK_KEY")
-# llm = ChatDeepSeek(
-#     model="deepseek-chat",
-#     temperature=0,
-#     api_key=deepseek_api_key,
-#     streaming=True,
-# )
 gemini_api_key = os.getenv("GEMINI_KEY")
-llm = ChatGoogleGenerativeAI(
-    model="gemini-1.5-pro",
-    temperature=0,
-    google_api_key=gemini_api_key,
-    disable_streaming=True,
-)
-rag_loader = OptimizedRAGLoader()
-retriever = rag_loader.get_retriever(k=5)  # Reduced k for faster retrieval
-# Cache for processed questions
 question_cache = {}
 prompt_template = ChatPromptTemplate.from_messages([
-    ("system", """Vous êtes un assistant juridique expert qualifié. Analysez et répondez aux questions juridiques avec précision.
-    PROCESSUS D'ANALYSE :
-    1. Analysez le contexte fourni : {context}
-    2. Utilisez la recherche web si la reponse n'existe pas dans le contexte
-    3. Privilégiez les sources officielles et la jurisprudence récente
-    Question à traiter : {question}
-    """),
-    ("human", "{question}")
-])
-import gradio as gr
-# Ajouter du CSS pour personnaliser l'apparence
 css = """
 /* Reset RTL global */
 *, *::before, *::after {
     direction: rtl !important;
     text-align: right !important;
 }
 body {
-    font-family: 'Amiri', sans-serif;  /* Utilisation de la police Arabe andalouse */
-    background-color: black;  /* Fond blanc */
-    color: black !important;  /* Texte noir */
-    direction: rtl !important;  /* Texte en arabe aligné à droite */
 }
 .gradio-container {
-    direction: rtl !important;  /* Alignement RTL pour toute l'interface */
 }
-/* Éléments de formulaire */
-input[type="text"],
-.gradio-textbox input,
-textarea {
-    border-radius: 20px;
-    padding: 10px 15px;
-    border: 2px solid #000;
-    font-size: 16px;
-    width: 80%;
-    margin: 0 auto;
     text-align: right !important;
 }
-/* Surcharge des styles de placeholder */
-input::placeholder,
-textarea::placeholder {
     text-align: right !important;
     direction: rtl !important;
 }
-/* Boutons */
 .gradio-button {
-    border-radius: 20px;
-    font-size: 16px;
-    background-color: #007BFF;
-    color: white;
-    padding: 10px 20px;
-    margin: 10px auto;
-    border: none;
-    width: 80%;
-    display: block;
 }
 .gradio-button:hover {
-    background-color: #0056b3;
 }
 .gradio-chatbot .message {
-    border-radius: 20px;
-    padding: 10px;
-    margin: 10px 0;
-    background-color: #f1f1f1;
-    border: 1px solid #ddd;
-    width: 80%;
     text-align: right !important;
     direction: rtl !important;
 }
-/* Messages utilisateur alignés à gauche */
-.gradio-chatbot .user-message {
-    margin-right: auto;
-    background-color: #e3f2fd;
-    text-align: right !important;
-    direction: rtl !important;
 }
-/* Messages assistant alignés à droite */
-.gradio-chatbot .assistant-message {
-    margin-right: auto;
-    background-color: #f1f1f1;
-    text-align: right
 }
-/* Corrections RTL pour les éléments spécifiques */
-.gradio-textbox textarea {
-    text-align: right !important;
 }
-.gradio-dropdown div {
     text-align: right !important;
 }
 """
-# Modified process_question function to better work with tuples
 def process_question(question: str) -> Iterator[str]:
-    if question in question_cache:
-        response, docs = question_cache[question]
-        sources = [doc.metadata.get("source") for doc in docs]
-        sources = list(set([os.path.splitext(source)[0] for source in sources]))
-        yield response + "\n\n\nالمصادر المحتملة :\n" + "\n".join(sources)
         return
-    relevant_docs = retriever(question)
-    # Reranking with cross-encoder
-    context = [doc.page_content for doc in relevant_docs]
-    text_pairs = [[question, text] for text in context]
-    scores = rag_loader.reranker.predict(text_pairs)
-    scored_docs = list(zip(scores, context, relevant_docs))
-    scored_docs.sort(key=lambda x: x[0], reverse=True)
-    reranked_docs = [d[2].page_content for d in scored_docs][:10]
-    prompt = prompt_template.format_messages(
-        context=reranked_docs,
-        question=question
-    )
-    full_response = ""
     try:
-        for chunk in llm.stream(prompt):
-            if isinstance(chunk, str):
-                current_chunk = chunk
             else:
-                current_chunk = chunk.content
-            full_response += current_chunk
-            sources = [d[2].metadata['source'] for d in scored_docs][:10]
-            sources = list(set([os.path.splitext(source)[0] for source in sources]))
-            yield full_response + "\n\n\nالمصادر المحتملة :\n" + "\n".join(sources)
-        question_cache[question] = (full_response, relevant_docs)
     except Exception as e:
-        yield f"Erreur lors du traitement : {str(e)}"
-# Updated gradio_stream function to work with tuples
-def gradio_stream(question: str, chat_history: list) -> Iterator[list]:
     try:
         for partial_response in process_question(question):
-            # Using tuples (user_message, bot_message) format
-            yield chat_history + [(question, partial_response)]
     except Exception as e:
-        yield chat_history + [(question, f"Erreur : {str(e)}")]
-# Gradio interface
-with gr.Blocks(css=css) as demo:
-    gr.Markdown("<h2 style='text-align: center !important;'>هذا تطبيق للاجابة على الأسئلة المتعلقة بالقوانين المغربية</h2>")
     with gr.Row():
-        message = gr.Textbox(label="أدخل سؤالك", placeholder="اكتب سؤالك هنا", elem_id="question_input")
-    with gr.Row():
-        send = gr.Button("بحث", elem_id="search_button")
     with gr.Row():
-        # No type parameter - use Gradio's default
-        chatbot = gr.Chatbot(label="")
-    # Simplified user_input function
-    def user_input(user_message, chat_history):
-        return "", chat_history + [(user_message, None)]
-    send.click(user_input, [message, chatbot], [message, chatbot], queue=False)
-    send.click(gradio_stream, [message, chatbot], chatbot)
-demo.launch(share=True)

+# import gradio as gr
+# from langchain_mistralai.chat_models import ChatMistralAI
+# from langchain.prompts import ChatPromptTemplate
+# from langchain_deepseek import ChatDeepSeek
+# from langchain_google_genai import ChatGoogleGenerativeAI
+# import os
+# from pathlib import Path
+# import json
+# import faiss
+# import numpy as np
+# from langchain.schema import Document
+# import pickle
+# import re
+# import requests
+# from functools import lru_cache
+# import torch
+# from sentence_transformers import SentenceTransformer
+# from sentence_transformers.cross_encoder import CrossEncoder
+# import threading
+# from queue import Queue
+# import concurrent.futures
+# from typing import Generator, Tuple, Iterator
+# import time
+# class OptimizedRAGLoader:
+#     def __init__(self,
+#                  docs_folder: str = "./docs",
+#                  splits_folder: str = "./splits",
+#                  index_folder: str = "./index"):
+#         self.docs_folder = Path(docs_folder)
+#         self.splits_folder = Path(splits_folder)
+#         self.index_folder = Path(index_folder)
+#         # Create folders if they don't exist
+#         for folder in [self.splits_folder, self.index_folder]:
+#             folder.mkdir(parents=True, exist_ok=True)
+#         # File paths
+#         self.splits_path = self.splits_folder / "splits.json"
+#         self.index_path = self.index_folder / "faiss.index"
+#         self.documents_path = self.index_folder / "documents.pkl"
+#         # Initialize components
+#         self.index = None
+#         self.indexed_documents = None
+#         # Initialize encoder model
+#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#         self.encoder = SentenceTransformer("intfloat/multilingual-e5-large")
+#         self.encoder.to(self.device)
+#         self.reranker = model = CrossEncoder("cross-encoder/mmarco-mMiniLMv2-L12-H384-v1",trust_remote_code=True)
+#         # Initialize thread pool
+#         self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+#         # Initialize response cache
+#         self.response_cache = {}
+#     @lru_cache(maxsize=1000)
+#     def encode(self, text: str):
+#         """Cached encoding function"""
+#         with torch.no_grad():
+#             embeddings = self.encoder.encode(
+#                 text,
+#                 convert_to_numpy=True,
+#                 normalize_embeddings=True
+#             )
+#         return embeddings
+#     def batch_encode(self, texts: list):
+#         """Batch encoding for multiple texts"""
+#         with torch.no_grad():
+#             embeddings = self.encoder.encode(
+#                 texts,
+#                 batch_size=32,
+#                 convert_to_numpy=True,
+#                 normalize_embeddings=True,
+#                 show_progress_bar=False
+#             )
+#         return embeddings
+#     def load_and_split_texts(self):
+#         if self._splits_exist():
+#             return self._load_existing_splits()
+#         documents = []
+#         futures = []
+#         for file_path in self.docs_folder.glob("*.txt"):
+#             future = self.executor.submit(self._process_file, file_path)
+#             futures.append(future)
+#         for future in concurrent.futures.as_completed(futures):
+#             documents.extend(future.result())
+#         self._save_splits(documents)
+#         return documents
+#     def _process_file(self, file_path):
+#         with open(file_path, 'r', encoding='utf-8') as file:
+#             text = file.read()
+#             chunks = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
+#             return [
+#                 Document(
+#                     page_content=chunk,
+#                     metadata={
+#                         'source': file_path.name,
+#                         'chunk_id': i,
+#                         'total_chunks': len(chunks)
+#                     }
+#                 )
+#                 for i, chunk in enumerate(chunks)
+#             ]
+#     def load_index(self) -> bool:
+#         """
+#         Charge l'index FAISS et les documents associés s'ils existent
+#         Returns:
+#             bool: True si l'index a été chargé, False sinon
+#         """
+#         if not self._index_exists():
+#             print("Aucun index trouvé.")
+#             return False
+#         print("Chargement de l'index existant...")
+#         try:
+#             # Charger l'index FAISS
+#             self.index = faiss.read_index(str(self.index_path))
+#             # Charger les documents associés
+#             with open(self.documents_path, 'rb') as f:
+#                 self.indexed_documents = pickle.load(f)
+#             print(f"Index chargé avec {self.index.ntotal} vecteurs")
+#             return True
+#         except Exception as e:
+#             print(f"Erreur lors du chargement de l'index: {e}")
+#             return False
+#     def create_index(self, documents=None):
+#         if documents is None:
+#             documents = self.load_and_split_texts()
+#         if not documents:
+#             return False
+#         texts = [doc.page_content for doc in documents]
+#         embeddings = self.batch_encode(texts)
+#         dimension = embeddings.shape[1]
+#         self.index = faiss.IndexFlatL2(dimension)
+#         if torch.cuda.is_available():
+#             # Use GPU for FAISS if available
+#             res = faiss.StandardGpuResources()
+#             self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
+#         self.index.add(np.array(embeddings).astype('float32'))
+#         self.indexed_documents = documents
+#         # Save index and documents
+#         cpu_index = faiss.index_gpu_to_cpu(self.index) if torch.cuda.is_available() else self.index
+#         faiss.write_index(cpu_index, str(self.index_path))
+#         with open(self.documents_path, 'wb') as f:
+#             pickle.dump(documents, f)
+#         return True
+#     def _index_exists(self) -> bool:
+#         """Vérifie si l'index et les documents associés existent"""
+#         return self.index_path.exists() and self.documents_path.exists()
+#     def get_retriever(self, k: int = 10):
+#         if self.index is None:
+#             if not self.load_index():
+#                 if not self.create_index():
+#                     raise ValueError("Unable to load or create index")
+#         def retriever_function(query: str) -> list:
+#             # Check cache first
+#             cache_key = f"{query}_{k}"
+#             if cache_key in self.response_cache:
+#                 return self.response_cache[cache_key]
+#             query_embedding = self.encode(query)
+#             distances, indices = self.index.search(
+#                 np.array([query_embedding]).astype('float32'),
+#                 k
+#             )
+#             results = [
+#                 self.indexed_documents[idx]
+#                 for idx in indices[0]
+#                 if idx != -1
+#             ]
+#             # Cache the results
+#             self.response_cache[cache_key] = results
+#             return results
+#         return retriever_function
+# # # Initialize components
+# # mistral_api_key = os.getenv("mistral_api_key")
+# # llm = ChatMistralAI(
+# #     model="mistral-large-latest",
+# #     mistral_api_key=mistral_api_key,
+# #     temperature=0.01,
+# #     streaming=True,
+# # )
+# # deepseek_api_key = os.getenv("DEEPSEEK_KEY")
+# # llm = ChatDeepSeek(
+# #     model="deepseek-chat",
+# #     temperature=0,
+# #     api_key=deepseek_api_key,
+# #     streaming=True,
+# # )
+# gemini_api_key = os.getenv("GEMINI_KEY")
+# llm = ChatGoogleGenerativeAI(
+#     model="gemini-1.5-pro",
+#     temperature=0,
+#     google_api_key=gemini_api_key,
+#     disable_streaming=True,
+# )
+# rag_loader = OptimizedRAGLoader()
+# retriever = rag_loader.get_retriever(k=5)  # Reduced k for faster retrieval
+# # Cache for processed questions
+# question_cache = {}
+# prompt_template = ChatPromptTemplate.from_messages([
+#     ("system", """Vous êtes un assistant juridique expert qualifié. Analysez et répondez aux questions juridiques avec précision.
+#     PROCESSUS D'ANALYSE :
+#     1. Analysez le contexte fourni : {context}
+#     2. Utilisez la recherche web si la reponse n'existe pas dans le contexte
+#     3. Privilégiez les sources officielles et la jurisprudence récente
+#     Question à traiter : {question}
+#     """),
+#     ("human", "{question}")
+# ])
+# import gradio as gr
+# # Ajouter du CSS pour personnaliser l'apparence
+# css = """
+# /* Reset RTL global */
+# *, *::before, *::after {
+#     direction: rtl !important;
+#     text-align: right !important;
+# }
+# body {
+#     font-family: 'Amiri', sans-serif;  /* Utilisation de la police Arabe andalouse */
+#     background-color: black;  /* Fond blanc */
+#     color: black !important;  /* Texte noir */
+#     direction: rtl !important;  /* Texte en arabe aligné à droite */
+# }
+# .gradio-container {
+#     direction: rtl !important;  /* Alignement RTL pour toute l'interface */
+# }
+# /* Éléments de formulaire */
+# input[type="text"],
+# .gradio-textbox input,
+# textarea {
+#     border-radius: 20px;
+#     padding: 10px 15px;
+#     border: 2px solid #000;
+#     font-size: 16px;
+#     width: 80%;
+#     margin: 0 auto;
+#     text-align: right !important;
+# }
+# /* Surcharge des styles de placeholder */
+# input::placeholder,
+# textarea::placeholder {
+#     text-align: right !important;
+#     direction: rtl !important;
+# }
+# /* Boutons */
+# .gradio-button {
+#     border-radius: 20px;
+#     font-size: 16px;
+#     background-color: #007BFF;
+#     color: white;
+#     padding: 10px 20px;
+#     margin: 10px auto;
+#     border: none;
+#     width: 80%;
+#     display: block;
+# }
+# .gradio-button:hover {
+#     background-color: #0056b3;
+# }
+# .gradio-chatbot .message {
+#     border-radius: 20px;
+#     padding: 10px;
+#     margin: 10px 0;
+#     background-color: #f1f1f1;
+#     border: 1px solid #ddd;
+#     width: 80%;
+#     text-align: right !important;
+#     direction: rtl !important;
+# }
+# /* Messages utilisateur alignés à gauche */
+# .gradio-chatbot .user-message {
+#     margin-right: auto;
+#     background-color: #e3f2fd;
+#     text-align: right !important;
+#     direction: rtl !important;
+# }
+# /* Messages assistant alignés à droite */
+# .gradio-chatbot .assistant-message {
+#     margin-right: auto;
+#     background-color: #f1f1f1;
+#     text-align: right
+# }
+# /* Corrections RTL pour les éléments spécifiques */
+# .gradio-textbox textarea {
+#     text-align: right !important;
+# }
+# .gradio-dropdown div {
+#     text-align: right !important;
+# }
+# """
+# # Modified process_question function to better work with tuples
+# def process_question(question: str) -> Iterator[str]:
+#     if question in question_cache:
+#         response, docs = question_cache[question]
+#         sources = [doc.metadata.get("source") for doc in docs]
+#         sources = list(set([os.path.splitext(source)[0] for source in sources]))
+#         yield response + "\n\n\nالمصادر المحتملة :\n" + "\n".join(sources)
+#         return
+#     relevant_docs = retriever(question)
+#     # Reranking with cross-encoder
+#     context = [doc.page_content for doc in relevant_docs]
+#     text_pairs = [[question, text] for text in context]
+#     scores = rag_loader.reranker.predict(text_pairs)
+#     scored_docs = list(zip(scores, context, relevant_docs))
+#     scored_docs.sort(key=lambda x: x[0], reverse=True)
+#     reranked_docs = [d[2].page_content for d in scored_docs][:10]
+#     prompt = prompt_template.format_messages(
+#         context=reranked_docs,
+#         question=question
+#     )
+#     full_response = ""
+#     try:
+#         for chunk in llm.stream(prompt):
+#             if isinstance(chunk, str):
+#                 current_chunk = chunk
+#             else:
+#                 current_chunk = chunk.content
+#             full_response += current_chunk
+#             sources = [d[2].metadata['source'] for d in scored_docs][:10]
+#             sources = list(set([os.path.splitext(source)[0] for source in sources]))
+#             yield full_response + "\n\n\nالمصادر المحتملة :\n" + "\n".join(sources)
+#         question_cache[question] = (full_response, relevant_docs)
+#     except Exception as e:
+#         yield f"Erreur lors du traitement : {str(e)}"
+# # Updated gradio_stream function for 'messages' format
+# def gradio_stream(question: str, chat_history: list) -> Iterator[list]:
+#     # chat_history now contains the user message added by user_input
+#     # Add a placeholder for the assistant's response
+#     chat_history.append({"role": "assistant", "content": ""})
+#     try:
+#         # Stream the response using the existing process_question generator
+#         for partial_response in process_question(question):
+#             # Update the content of the last message (the assistant's placeholder)
+#             chat_history[-1]["content"] = partial_response
+#             yield chat_history # Yield the entire updated history list
+#     except Exception as e:
+#         # Update the assistant's message with the error
+#         chat_history[-1]["content"] = f"Erreur : {str(e)}"
+#         yield chat_history # Yield the history with the error message
+# # Gradio interface
+# with gr.Blocks(css=css) as demo:
+#     gr.Markdown("<h2 style='text-align: center !important;'>هذا تطبيق للاجابة على الأسئلة المتعلقة بالقوانين المغربية</h2>")
+#     with gr.Row():
+#         message = gr.Textbox(label="أدخل سؤالك", placeholder="اكتب سؤالك هنا", elem_id="question_input")
+#     with gr.Row():
+#         send = gr.Button("بحث", elem_id="search_button")
+#     with gr.Row():
+#         # No type parameter - use Gradio's default
+#         chatbot = gr.Chatbot(label="", type="messages") # Ajout de type="messages"
+#     # Updated user_input function for 'messages' format
+#     def user_input(user_message, chat_history):
+#         # chat_history is already a list of message dicts
+#         # Append the new user message
+#         return "", chat_history + [{"role": "user", "content": user_message}]
+#     send.click(user_input, [message, chatbot], [message, chatbot], queue=False)
+#     send.click(gradio_stream, [message, chatbot], chatbot)
+# demo.launch(share=True)
 import gradio as gr
 from langchain_mistralai.chat_models import ChatMistralAI
 from langchain.prompts import ChatPromptTemplate
 import threading
 from queue import Queue
 import concurrent.futures
+from typing import Generator, Tuple, Iterator, List, Dict
 import time
+# --- (Votre classe OptimizedRAGLoader reste la même) ---
 class OptimizedRAGLoader:
     def __init__(self,
                  docs_folder: str = "./docs",
                  splits_folder: str = "./splits",
                  index_folder: str = "./index"):
         self.docs_folder = Path(docs_folder)
         self.splits_folder = Path(splits_folder)
         self.index_folder = Path(index_folder)
         # Create folders if they don't exist
         for folder in [self.splits_folder, self.index_folder]:
             folder.mkdir(parents=True, exist_ok=True)
         # File paths
         self.splits_path = self.splits_folder / "splits.json"
         self.index_path = self.index_folder / "faiss.index"
         self.documents_path = self.index_folder / "documents.pkl"
         # Initialize components
         self.index = None
         self.indexed_documents = None
         # Initialize encoder model
+        print("Loading Sentence Transformer...")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.encoder = SentenceTransformer("intfloat/multilingual-e5-large")
         self.encoder.to(self.device)
+        print("Loading Cross Encoder...")
+        self.reranker = CrossEncoder("cross-encoder/mmarco-mMiniLMv2-L12-H384-v1", trust_remote_code=True)
+        print("Models loaded.")
         # Initialize thread pool
         self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
         # Initialize response cache
         self.response_cache = {}
+        # Try loading index on init
+        self.load_or_create_index()
+    def load_or_create_index(self):
+        """Loads index if exists, otherwise creates it."""
+        if not self.load_index():
+            print("Index not found, creating new index...")
+            if not self.create_index():
+                raise RuntimeError("Failed to create index.")
+            else:
+                print("Index created successfully.")
+        else:
+             print("Index loaded successfully.")
     @lru_cache(maxsize=1000)
     def encode(self, text: str):
         """Cached encoding function"""
             embeddings = self.encoder.encode(
                 text,
                 convert_to_numpy=True,
+                normalize_embeddings=True,
+                device=self.device # Ensure encoding runs on the correct device
             )
         return embeddings
     def batch_encode(self, texts: list):
         """Batch encoding for multiple texts"""
         with torch.no_grad():
             embeddings = self.encoder.encode(
                 texts,
+                batch_size=32, # Adjust based on GPU memory
                 convert_to_numpy=True,
                 normalize_embeddings=True,
+                show_progress_bar=True, # Show progress for potentially long operations
+                device=self.device # Ensure encoding runs on the correct device
             )
         return embeddings
+    def _splits_exist(self) -> bool:
+        """Check if split files exist."""
+        return self.splits_path.exists()
+    def _load_existing_splits(self) -> List[Document]:
+        """Load splits from JSON file."""
+        print(f"Loading existing splits from {self.splits_path}...")
+        try:
+            with open(self.splits_path, 'r', encoding='utf-8') as f:
+                splits_data = json.load(f)
+            documents = [
+                Document(page_content=item['page_content'], metadata=item['metadata'])
+                for item in splits_data
+            ]
+            print(f"Loaded {len(documents)} splits.")
+            return documents
+        except Exception as e:
+            print(f"Error loading splits: {e}. Recreating...")
+            return [] # Return empty list to trigger recreation
+    def _save_splits(self, documents: List[Document]):
+        """Save splits to JSON file."""
+        print(f"Saving {len(documents)} splits to {self.splits_path}...")
+        splits_data = [
+            {'page_content': doc.page_content, 'metadata': doc.metadata}
+            for doc in documents
+        ]
+        try:
+            with open(self.splits_path, 'w', encoding='utf-8') as f:
+                json.dump(splits_data, f, ensure_ascii=False, indent=2)
+            print("Splits saved successfully.")
+        except Exception as e:
+            print(f"Error saving splits: {e}")
+    def load_and_split_texts(self) -> List[Document]:
         if self._splits_exist():
+             loaded_splits = self._load_existing_splits()
+             if loaded_splits: # Check if loading was successful
+                 return loaded_splits
+        print("Processing documents and creating splits...")
         documents = []
         futures = []
+        # Ensure docs folder exists
+        if not self.docs_folder.is_dir():
+             print(f"Error: Docs folder not found at {self.docs_folder}")
+             # Create dummy docs folder for Spaces if it doesn't exist
+             self.docs_folder.mkdir(parents=True, exist_ok=True)
+             print(f"Created empty docs folder at {self.docs_folder}. Please upload text files.")
+             # You might want to add a default dummy file here for testing
+             # with open(self.docs_folder / "dummy.txt", "w") as f:
+             #     f.write("This is a dummy file. Please replace with real legal documents.")
+             # return [] # Return empty if no real docs
+             # Or let it continue to process the dummy file if created
+        doc_files = list(self.docs_folder.glob("*.txt"))
+        if not doc_files:
+            print(f"No .txt files found in {self.docs_folder}. Cannot create index.")
+            # Add a dummy document if none exist to prevent errors downstream?
+            # Or handle this case in create_index more gracefully.
+            return []
+        print(f"Found {len(doc_files)} files to process.")
+        for file_path in doc_files:
             future = self.executor.submit(self._process_file, file_path)
             futures.append(future)
+        processed_count = 0
         for future in concurrent.futures.as_completed(futures):
+            try:
+                documents.extend(future.result())
+                processed_count += 1
+                print(f"Processed file {processed_count}/{len(doc_files)}")
+            except Exception as e:
+                print(f"Error processing file in future: {e}")
+        if documents:
+            self._save_splits(documents)
+        else:
+            print("No documents were successfully processed or split.")
         return documents
+    def _process_file(self, file_path: Path) -> List[Document]:
+        try:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                text = file.read()
+                # Improved splitting: handle more sentence endings and ensure non-empty chunks
+                chunks = [s.strip() for s in re.split(r'(?<=[.!?؟؛])\s+', text) if s and s.strip()]
+                if not chunks: # Handle empty files or files with no standard sentence endings
+                    print(f"Warning: No chunks generated for file {file_path.name}. Treating whole file as one chunk.")
+                    if text.strip(): # If there's content, use it as one chunk
+                         chunks = [text.strip()]
+                    else:
+                         return [] # Skip empty files
+                return [
+                    Document(
+                        page_content=chunk,
+                        metadata={
+                            'source': file_path.name,
+                            'chunk_id': i,
+                            'total_chunks': len(chunks)
+                        }
+                    )
+                    for i, chunk in enumerate(chunks)
+                ]
+        except Exception as e:
+             print(f"Error processing file {file_path.name}: {e}")
+             return [] # Return empty list on error for this file
+    def load_index(self) -> bool:
+        """Loads FAISS index and associated documents if they exist."""
         if not self._index_exists():
+            # print("Index files not found.") # Reduced verbosity
             return False
+        print(f"Loading existing index from {self.index_path} and documents from {self.documents_path}...")
         try:
+            # Load FAISS index
             self.index = faiss.read_index(str(self.index_path))
+             # If the loaded index was originally GPU, move it back if possible
+            if torch.cuda.is_available():
+                 try:
+                     print("Moving loaded index to GPU...")
+                     res = faiss.StandardGpuResources()
+                     self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
+                     print("Index successfully moved to GPU.")
+                 except Exception as gpu_e:
+                     print(f"Could not move index to GPU, using CPU. Error: {gpu_e}")
+            # Load associated documents
             with open(self.documents_path, 'rb') as f:
                 self.indexed_documents = pickle.load(f)
+            if not self.indexed_documents:
+                 print("Warning: Index loaded, but associated documents file is empty.")
+                 # Consider this a failure case maybe?
+                 # return False
+            elif self.index.ntotal != len(self.indexed_documents):
+                 print(f"Warning: Index size ({self.index.ntotal}) does not match document count ({len(self.indexed_documents)}). Index might be corrupted or outdated.")
+                 # Decide how to handle mismatch: rebuild? error? proceed with caution?
+                 # For now, let's treat it as loaded but potentially problematic.
+            print(f"Index loaded with {self.index.ntotal} vectors.")
             return True
+        except FileNotFoundError:
+             print("Index files not found during load attempt.")
+             self.index = None
+             self.indexed_documents = None
+             return False
         except Exception as e:
+            print(f"Error loading index: {e}")
+            # Clean up potentially partially loaded state
+            self.index = None
+            self.indexed_documents = None
             return False
+    def create_index(self, documents: List[Document] = None) -> bool:
+        """Creates or recreates the FAISS index."""
         if documents is None:
             documents = self.load_and_split_texts()
         if not documents:
+            print("No documents provided or loaded, cannot create index.")
             return False
+        print(f"Creating index for {len(documents)} document splits...")
         texts = [doc.page_content for doc in documents]
+        print("Encoding documents...")
         embeddings = self.batch_encode(texts)
+        if embeddings is None or len(embeddings) == 0:
+             print("Encoding failed or produced no embeddings.")
+             return False
         dimension = embeddings.shape[1]
+        print(f"Embeddings created with dimension {dimension}.")
+        # Create CPU index first
+        cpu_index = faiss.IndexFlatL2(dimension)
+        print("FAISS CPU index created.")
+        # Use GPU for FAISS if available
         if torch.cuda.is_available():
+            try:
+                print("Attempting to use GPU for FAISS indexing...")
+                res = faiss.StandardGpuResources()
+                self.index = faiss.index_cpu_to_gpu(res, 0, cpu_index)
+                print("Adding embeddings to GPU index...")
+                self.index.add(np.array(embeddings).astype('float32'))
+                print(f"Embeddings added to GPU index. Index size: {self.index.ntotal}")
+                # Save the CPU version of the index
+                print(f"Saving CPU version of index to {self.index_path}...")
+                faiss.write_index(faiss.index_gpu_to_cpu(self.index), str(self.index_path))
+            except Exception as gpu_e:
+                print(f"GPU indexing failed: {gpu_e}. Falling back to CPU.")
+                self.index = cpu_index # Fallback to CPU index
+                print("Adding embeddings to CPU index...")
+                self.index.add(np.array(embeddings).astype('float32'))
+                print(f"Embeddings added to CPU index. Index size: {self.index.ntotal}")
+                print(f"Saving CPU index to {self.index_path}...")
+                faiss.write_index(self.index, str(self.index_path))
+        else:
+            print("GPU not available. Using CPU for FAISS indexing.")
+            self.index = cpu_index
+            print("Adding embeddings to CPU index...")
+            self.index.add(np.array(embeddings).astype('float32'))
+            print(f"Embeddings added to CPU index. Index size: {self.index.ntotal}")
+            print(f"Saving CPU index to {self.index_path}...")
+            faiss.write_index(self.index, str(self.index_path))
         self.indexed_documents = documents
+        # Save documents
+        print(f"Saving associated documents to {self.documents_path}...")
+        try:
+            with open(self.documents_path, 'wb') as f:
+                pickle.dump(documents, f)
+            print("Index and documents saved successfully.")
+            return True
+        except Exception as e:
+             print(f"Error saving associated documents: {e}")
+             # Should we delete the index file if doc saving fails?
+             # self.index_path.unlink(missing_ok=True)
+             return False
     def _index_exists(self) -> bool:
+        """Checks if the index and associated document files exist."""
         return self.index_path.exists() and self.documents_path.exists()
+    def get_retriever(self, k: int = 10, rerank_k: int = 5):
+        """Gets a retriever function that performs FAISS search and cross-encoder reranking."""
+        if self.index is None or self.indexed_documents is None:
+            print("Index not initialized. Ensure load_or_create_index() was successful.")
+            # Attempt to load/create again, or raise error
+            self.load_or_create_index()
+            if self.index is None or self.indexed_documents is None:
+                 raise ValueError("Unable to load or create index for retriever.")
+        # Make sure k for FAISS search is >= rerank_k
+        faiss_k = max(k, rerank_k)
+        def retriever_function(query: str) -> list[Document]:
+            # Check cache first (optional, consider if caching reranked results is desired)
+            # cache_key = f"{query}_{rerank_k}"
+            # if cache_key in self.response_cache:
+            #     return self.response_cache[cache_key]
+            print(f"\nRetriever: Searching for query: '{query[:50]}...'")
             query_embedding = self.encode(query)
+            print(f"Searching top {faiss_k} in FAISS index...")
+            try:
+                distances, indices = self.index.search(
+                    np.array([query_embedding]).astype('float32'),
+                    faiss_k
+                )
+            except Exception as search_e:
+                 print(f"Error during FAISS search: {search_e}")
+                 return []
+            # Filter out invalid indices (-1) and get initial documents
+            initial_results = [
                 self.indexed_documents[idx]
                 for idx in indices[0]
+                if idx != -1 and idx < len(self.indexed_documents) # Added bounds check
             ]
+            if not initial_results:
+                print("No relevant documents found in FAISS search.")
+                return []
+            print(f"Found {len(initial_results)} initial candidates. Reranking top {len(initial_results)}...")
+            # Prepare for reranking
+            context = [doc.page_content for doc in initial_results]
+            text_pairs = [[query, text] for text in context]
+            # Rerank using the cross-encoder
+            try:
+                 scores = self.reranker.predict(text_pairs, show_progress_bar=False) # Don't show progress bar here
+            except Exception as rerank_e:
+                 print(f"Error during reranking: {rerank_e}. Returning initial FAISS results.")
+                 return initial_results[:rerank_k] # Return top k from initial results
+            # Combine scores with documents and sort
+            scored_docs = list(zip(scores, initial_results))
+            scored_docs.sort(key=lambda x: x[0], reverse=True)
+            # Select the top rerank_k documents
+            reranked_docs = [doc for score, doc in scored_docs[:rerank_k]]
+            print(f"Reranked results (top {len(reranked_docs)}):")
+            # for i, (score, doc) in enumerate(scored_docs[:rerank_k]):
+            #      print(f"  {i+1}. Score: {score:.4f}, Source: {doc.metadata.get('source', 'N/A')}, Chunk: {doc.metadata.get('chunk_id', 'N/A')}")
+            # Cache results (optional)
+            # self.response_cache[cache_key] = reranked_docs
+            return reranked_docs
         return retriever_function
+# --- LLM Initialization ---
+# Choose *one* LLM to uncomment based on your available API key
+# print("Initializing LLM...")
+# mistral_api_key = os.getenv("MISTRAL_API_KEY") # Ensure env var name matches your Space secret
+# if mistral_api_key:
+#     llm = ChatMistralAI(
+#         model="mistral-large-latest",
+#         mistral_api_key=mistral_api_key,
+#         temperature=0.01,
+#         # streaming=True, # Streaming is handled differently with Gradio 'messages'
+#     )
+#     print("Using Mistral LLM.")
+# else:
+#     print("Mistral API key not found.")
 # deepseek_api_key = os.getenv("DEEPSEEK_KEY")
+# if deepseek_api_key:
+#     llm = ChatDeepSeek(
+#         model="deepseek-chat",
+#         temperature=0.01, # Slightly non-zero for potentially better phrasing
+#         api_key=deepseek_api_key,
+#         # streaming=True, # Streaming is handled differently with Gradio 'messages'
+#     )
+#     print("Using DeepSeek LLM.")
+# else:
+#      print("Deepseek API key not found.")
 gemini_api_key = os.getenv("GEMINI_KEY")
+if gemini_api_key:
+    try:
+        llm = ChatGoogleGenerativeAI(
+            model="gemini-1.5-flash", # Using flash for potentially faster responses
+            temperature=0.1, # Slightly increased temperature
+            google_api_key=gemini_api_key,
+            # convert_system_message_to_human=True # Sometimes needed for Gemini
+            # streaming=False # Gemini streaming requires specific handling; simpler without for now
+        )
+        print("Using Google Gemini LLM.")
+    except Exception as gemini_e:
+        print(f"Error initializing Gemini LLM: {gemini_e}")
+        llm = None # Set llm to None if initialization fails
+else:
+     print("Gemini API key not found.")
+     llm = None
+# --- RAG Loader and Retriever Initialization ---
+print("Initializing RAG Loader...")
+try:
+    rag_loader = OptimizedRAGLoader()
+    retriever = rag_loader.get_retriever(k=15, rerank_k=5) # Retrieve more initially, rerank top 5
+    print("RAG Loader and Retriever initialized.")
+except Exception as rag_e:
+     print(f"FATAL: Could not initialize RAG system: {rag_e}")
+     # Optionally exit or provide a dummy retriever/LLM
+     retriever = lambda query: [] # Dummy retriever
+     llm = None # Ensure LLM is None if RAG fails
+# Cache for processed questions (Consider persistence or size limits if needed)
 question_cache = {}
+# --- Prompt Template ---
+# Adjusted prompt for clarity and conciseness
 prompt_template = ChatPromptTemplate.from_messages([
+    ("system", """أنت مساعد قانوني خبير ومؤهل في القانون المغربي. مهمتك هي تحليل الأسئلة القانونية والإجابة عليها بدقة بناءً على السياق المقدم.
+إرشادات:
+1.  حلل السياق التالي بعناية:
+    {context}
+2.  استخدم المعلومات من السياق فقط لصياغة إجابتك.
+3.  إذا كانت المعلومات غير كافية أو غير موجودة في السياق للإجابة على السؤال، أشر بوضوح إلى أن السياق المقدم لا يحتوي على الإجابة المطلوبة. لا تختلق معلومات.
+4.  اذكر المصادر (أسماء الملفات) التي استخدمتها من السياق في نهاية إجابتك.
+5.  أجب باللغة العربية وبأسلوب واضح وموجز.
+السؤال المطلوب الإجابة عليه: {question}"""),
+    ("human", "{question}") # Human message might be redundant if question is in system prompt, but often helps guide model role.
+])
+# --- CSS Styling ---
+# (CSS remains the same, ensure RTL works as intended)
 css = """
 /* Reset RTL global */
+:root {
+     --input-border-radius: 15px !important; /* Example variable */
+     --button-border-radius: 15px !important;
+}
 *, *::before, *::after {
     direction: rtl !important;
     text-align: right !important;
 }
 body {
+    font-family: 'Arial', 'sans-serif'; /* Using a more standard font */
+    background-color: #f8f9fa; /* Light gray background */
+    color: #343a40; /* Darker text */
+    direction: rtl !important;
 }
 .gradio-container {
+    direction: rtl !important;
+    background-color: #f8f9fa;
 }
+/* Input Textbox */
+.gradio-textbox textarea {
+    border-radius: var(--input-border-radius) !important;
+    padding: 12px 18px !important;
+    border: 1px solid #ced4da !important;
+    font-size: 16px !important;
+    width: 95% !important; /* Adjust width */
+    margin: 10px auto !important;
+    display: block !important;
     text-align: right !important;
+    background-color: #ffffff !important; /* White background for input */
+    color: #495057 !important; /* Input text color */
+    box-shadow: 0 2px 4px rgba(0,0,0,0.05) !important; /* Subtle shadow */
 }
+/* Placeholder styling */
+.gradio-textbox textarea::placeholder {
     text-align: right !important;
     direction: rtl !important;
+    color: #6c757d !important; /* Lighter placeholder text */
 }
+/* Send Button */
 .gradio-button {
+    border-radius: var(--button-border-radius) !important;
+    font-size: 16px !important;
+    font-weight: bold !important;
+    background-color: #007bff !important; /* Primary blue */
+    color: white !important;
+    padding: 12px 24px !important;
+    margin: 5px auto 15px auto !important; /* Adjust margins */
+    border: none !important;
+    width: 95% !important; /* Match textbox width */
+    display: block !important;
+    cursor: pointer !important;
+    transition: background-color 0.2s ease-in-out !important;
+    box-shadow: 0 2px 5px rgba(0, 123, 255, 0.3) !important; /* Button shadow */
 }
 .gradio-button:hover {
+    background-color: #0056b3 !important; /* Darker blue on hover */
+}
+/* Chatbot Messages */
+.gradio-chatbot {
+    box-shadow: 0 2px 10px rgba(0,0,0,0.1) !important; /* Chatbot container shadow */
+    border-radius: 10px !important;
+    background-color: #ffffff !important; /* White background for chat area */
+    padding: 10px !important;
+}
+.gradio-chatbot .message-wrap { /* Targeting the wrapper for better styling */
+     padding: 5px 0 !important; /* Space between messages */
 }
 .gradio-chatbot .message {
+    border-radius: 18px !important; /* Rounded corners for messages */
+    padding: 10px 15px !important;
+    margin: 5px 0 !important; /* Vertical margin */
+    max-width: 85% !important; /* Max width of message bubble */
+    border: none !important; /* Remove default border */
+    box-shadow: 0 1px 3px rgba(0,0,0,0.08) !important; /* Subtle message shadow */
     text-align: right !important;
     direction: rtl !important;
+    word-wrap: break-word; /* Ensure long words break */
+    line-height: 1.5; /* Improve readability */
 }
+/* User Messages (align left conceptually, but RTL makes them appear right-aligned within container) */
+.gradio-chatbot .user-message .message {
+    margin-left: auto !important; /* Push to the 'end' side in LTR, start in RTL */
+    margin-right: 0 !important;
+    background-color: #e7f5ff !important; /* Light blue for user */
+    color: #0056b3 !important;
 }
+/* Assistant Messages (align right conceptually, appear left-aligned in RTL) */
+.gradio-chatbot .assistant-message .message {
+    margin-right: auto !important; /* Push to the 'start' side in LTR, end in RTL */
+    margin-left: 0 !important;
+    background-color: #f1f3f5 !important; /* Light gray for assistant */
+    color: #343a40 !important;
 }
+/* Markdown Header */
+h2 {
+     color: #0056b3 !important; /* Match button hover color */
+     font-weight: bold !important;
+     margin-bottom: 20px !important;
+     text-align: center !important;
+     direction: rtl !important; /* Ensure header is also RTL */
 }
+/* Ensure specific Gradio elements inherit RTL */
+.gradio-dropdown div, .gradio-checkboxgroup div, .gradio-radio div {
     text-align: right !important;
+    direction: rtl !important;
+}
+/* Center alignment for elements within rows if needed */
+.gradio-row {
+    justify-content: center !important; /* Helps center content like buttons/textboxes */
 }
 """
+# --- Backend Processing Function ---
 def process_question(question: str) -> Iterator[str]:
+    """
+    Processes a question using RAG and LLM, yielding the response stream.
+    Includes source attribution.
+    """
+    if not llm:
+        yield "عذراً، النموذج اللغوي غير متاح حالياً. يرجى المحاولة لاحقاً."
         return
+    if not retriever:
+        yield "عذراً، نظام استرجاع المعلومات غير متاح حالياً."
+        return
+    # Simple caching check (consider more robust caching)
+    # if question in question_cache:
+    #     response, sources_str = question_cache[question]
+    #     yield response + sources_str
+    #     return
+    print(f"Processing question: {question}")
     try:
+        relevant_docs = retriever(question)
+    except Exception as ret_e:
+         print(f"Error during retrieval: {ret_e}")
+         yield f"حدث خطأ أثناء البحث عن المستندات المتعلقة: {str(ret_e)}"
+         return
+    if not relevant_docs:
+        print("No relevant documents found by retriever.")
+        yield "لم أتمكن من العثور على معلومات ذات صلة في المستندات المتاحة للإجابة على سؤالك."
+        return
+    context_str = "\n\n".join([f"المصدر: {doc.metadata.get('source', 'غير معروف')}\nالمحتوى: {doc.page_content}" for doc in relevant_docs])
+    sources = list(set([doc.metadata.get("source", "غير معروف") for doc in relevant_docs]))
+    sources_str = "\n\n\nالمصادر المحتملة التي تم الرجوع إليها:\n- " + "\n- ".join(sources)
+    print(f"Context created from {len(relevant_docs)} documents. Generating response...")
+    # print(f"Context sample: {context_str[:200]}...") # Debugging
+    try:
+        # Format the prompt using the template
+        prompt = prompt_template.format_messages(
+            context=context_str,
+            question=question
+        )
+        # --- Non-Streaming Call (Simpler for Gemini without specific streaming setup) ---
+        # full_response = llm.invoke(prompt) # Use invoke for non-streaming
+        # if isinstance(full_response, str): # Handle potential different return types
+        #     response_content = full_response
+        # else:
+        #     response_content = full_response.content
+        # yield response_content + sources_str # Yield the complete response at once
+        # --- Streaming Call (If LLM supports it and is configured correctly) ---
+        full_response = ""
+        stream = llm.stream(prompt)
+        start_time = time.time()
+        first_chunk_received = False
+        for chunk in stream:
+            if not first_chunk_received:
+                 end_time = time.time()
+                 print(f"Time to first chunk: {end_time - start_time:.2f} seconds")
+                 first_chunk_received = True
+            # Adapt based on Langchain version and LLM provider's chunk structure
+            if hasattr(chunk, 'content'):
+                 current_chunk = chunk.content
+            elif isinstance(chunk, str):
+                 current_chunk = chunk
             else:
+                 print(f"Unexpected chunk type: {type(chunk)}")
+                 current_chunk = str(chunk) # Fallback
+            if current_chunk: # Avoid adding empty chunks
+                 full_response += current_chunk
+                 # Yield intermediate response with sources appended
+                 yield full_response + sources_str # Appending sources at each step
+        if not first_chunk_received: # Handle cases where stream might be empty or fail silently
+             print("No chunks received from LLM stream.")
+             yield "حدث خطأ أو لم يتمكن النموذج من إنشاء رد." + sources_str
+        print("LLM response generation complete.")
+        # Cache the final result (optional)
+        # question_cache[question] = (full_response, sources_str)
     except Exception as e:
+        print(f"Error during LLM generation: {e}")
+        yield f"حدث خطأ أثناء توليد الإجابة: {str(e)}" + sources_str # Include sources even on error if possible
+# --- Gradio Interface Functions ---
+# Function to add user message to history (using 'messages' format)
+def user_input(user_message: str, chat_history: List[Dict[str, str]]) -> Tuple[str, List[Dict[str, str]]]:
+    if not user_message.strip(): # Prevent empty messages
+         return "", chat_history
+    # Append the user's message to the history in the correct format
+    return "", chat_history + [{"role": "user", "content": user_message}]
+# Function to handle the streaming response (using 'messages' format)
+def gradio_stream(question: str, chat_history: List[Dict[str, str]]) -> Iterator[List[Dict[str, str]]]:
+    if not question.strip(): # Prevent processing empty questions passed from user_input
+         yield chat_history
+         return
+    if not llm or not retriever:
+         chat_history.append({"role": "assistant", "content": "عذراً، النظام غير جاهز حالياً. يرجى التأكد من تهيئة المفاتيح والنماذج."})
+         yield chat_history
+         return
+    # Add a placeholder for the assistant's response
+    chat_history.append({"role": "assistant", "content": ""})
+    # Use a thinking indicator initially
+    chat_history[-1]["content"] = "جارٍ التفكير والبحث..."
+    yield chat_history # Show "Thinking..." message immediately
     try:
+        # Stream the response using the process_question generator
         for partial_response in process_question(question):
+            # Update the content of the last message (the assistant's placeholder)
+            chat_history[-1]["content"] = partial_response
+            yield chat_history # Yield the entire updated history list
     except Exception as e:
+        print(f"Error in gradio_stream calling process_question: {e}")
+        # Update the assistant's message with the error
+        chat_history[-1]["content"] = f"حدث خطأ غير متوقع: {str(e)}"
+        yield chat_history # Yield the history with the error message
+# --- Gradio Interface Definition ---
+print("Building Gradio interface...")
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: # Added a theme
+    gr.Markdown("<h2 style='text-align: center;'>مساعد قانوني مغربي - أسئلة وأجوبة</h2>")
+    gr.Markdown("<p style='text-align: center; color: #6c757d;'>اطرح سؤالك حول القوانين المغربية وسأحاول الإجابة بناءً على المستندات المتوفرة.</p>")
+    # Use type="messages" for the chatbot
+    chatbot = gr.Chatbot(label="المحادثة", type="messages", height=500)
     with gr.Row():
+        message = gr.Textbox(
+            label="أدخل سؤالك هنا:",
+            placeholder="مثال: ما هي شروط الحصول على تعويض عن حادثة شغل؟",
+            lines=3, # Allow more lines for input
+            elem_id="question_input" # Keep elem_id if needed elsewhere
+        )
     with gr.Row():
+        send = gr.Button("إرسال السؤال", variant="primary") # Use variant for emphasis
+    # Chain the actions:
+    # 1. When send is clicked, call user_input:
+    #    - Takes user message (from 'message' textbox) and current history (from 'chatbot')
+    #    - Outputs: Clears the 'message' textbox, updates 'chatbot' history with user message
+    # 2. After user_input completes, call gradio_stream:
+    #    - Takes the *original* user message (important!) and the *updated* history from step 1.
+    #    - Outputs: Streams updates back to the 'chatbot' component.
+    send.click(user_input, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
+         then(gradio_stream, inputs=[message, chatbot], outputs=chatbot)
+    # Optional: Allow submitting with Enter key
+    message.submit(user_input, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
+            then(gradio_stream, inputs=[message, chatbot], outputs=chatbot)
+print("Gradio Blocks defined.")
+# --- Launch the Application ---
+if __name__ == "__main__":
+    print("Launching Gradio app...")
+    # Set share=False when running locally or in environments like Spaces where it's handled differently
+    # Set debug=True for more detailed logs during development
+    demo.queue() # Enable queue for handling multiple users/requests
+    demo.launch(share=False, debug=True) # share=True can cause issues in some environments