Asistente_EUDR

Running on CPU Upgrade

App Files Files Community

ppsingh commited on 10 days ago

Commit

c464ae6

1 Parent(s): 4b1c32c

UI fixes and soruces fixes

Browse files

Files changed (5) hide show

app.py +41 -6
utils/__pycache__/generator.cpython-310.pyc +0 -0
utils/__pycache__/retriever.cpython-310.pyc +0 -0
utils/__pycache__/whisp_api.cpython-310.pyc +0 -0
utils/generator.py +50 -50

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from gradio_client import Client, handle_file
 from utils.whisp_api import handle_geojson_upload
 from utils.retriever import retrieve_paragraphs
 from utils.generator import generate
 # Sample questions for examples
 SAMPLE_QUESTIONS = {
@@ -45,6 +47,32 @@ def finish_chat():
     """Finish chat and reset input"""
     return gr.update(interactive=True, value="")
 async def chat_response(query, history, method, country, uploaded_file):
     """Generate chat response based on method and inputs"""
@@ -89,8 +117,15 @@ async def chat_response(query, history, method, country, uploaded_file):
     else:
         try:
             retrieved_paragraphs = retrieve_paragraphs(query, country)
-            print(country)
-            print(retrieved_paragraphs)
             response = await generate(query=query, context=retrieved_paragraphs)
         except Exception as e:
@@ -105,7 +140,7 @@ async def chat_response(query, history, method, country, uploaded_file):
     for i, char in enumerate(response_with_disclaimer):
         displayed_response += char
         history[-1] = (query, displayed_response)
-        yield history, "**Sources:** Sample source documents would appear here..."
         # Only add delay every few characters to avoid being too slow
         if i % 3 == 0:  # Adjust this number to control speed
             await asyncio.sleep(0.02)
@@ -217,7 +252,7 @@ init_prompt = """
         """
-with gr.Blocks(title="EUDR Bot", theme=theme, css=custom_css) as demo:
     # Main Chat Interface
     with gr.Tab("EUDR Bot"):
@@ -230,7 +265,7 @@ with gr.Blocks(title="EUDR Bot", theme=theme, css=custom_css) as demo:
                     show_label=False,
                     layout="panel",
                     avatar_images=(None, "chatbot_icon_2.png"),
-                    height=500
                 )
                 # Feedback UI
@@ -319,7 +354,7 @@ with gr.Blocks(title="EUDR Bot", theme=theme, css=custom_css) as demo:
                             sample_groups.append(group_examples)
                     # Sources Tab
-                    with gr.Tab("Fuentes", id=1):
                         sources_textbox = gr.HTML(
                             show_label=False,
                             value="Los documentos originales aparecerán aquí después de que hagas una pregunta..."

 from utils.whisp_api import handle_geojson_upload
 from utils.retriever import retrieve_paragraphs
 from utils.generator import generate
+import json
+import ast
 # Sample questions for examples
 SAMPLE_QUESTIONS = {
     """Finish chat and reset input"""
     return gr.update(interactive=True, value="")
+def make_html_source(source,i):
+    """
+    takes the text and converts it into html format for display in "source" side tab
+    """
+    meta = source['answer_metadata']
+    content = source['answer'].strip()
+    name = meta['filename']
+    card = f"""
+        <div class="card" id="doc{i}">
+            <div class="card-content">
+                <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
+                <p>{content}</p>
+            </div>
+            <div class="card-footer">
+                <span>{name}</span>
+                <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
+                    <span role="img" aria-label="Open PDF">🔗</span>
+                </a>
+            </div>
+        </div>
+        """
+    return card
 async def chat_response(query, history, method, country, uploaded_file):
     """Generate chat response based on method and inputs"""
     else:
         try:
             retrieved_paragraphs = retrieve_paragraphs(query, country)
+            context_retrieved = ast.literal_eval(retrieved_paragraphs)
+            context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved)
+            context_retrieved_lst = [doc['answer'] for doc in context_retrieved]
+            # print(country)
+            # print(retrieved_paragraphs)
+            docs_html = []
+            for i, d in enumerate(context_retrieved, 1):
+                docs_html.append(make_html_source(d, i))
+            docs_html = "".join(docs_html)
             response = await generate(query=query, context=retrieved_paragraphs)
         except Exception as e:
     for i, char in enumerate(response_with_disclaimer):
         displayed_response += char
         history[-1] = (query, displayed_response)
+        yield history, docs_html
         # Only add delay every few characters to avoid being too slow
         if i % 3 == 0:  # Adjust this number to control speed
             await asyncio.sleep(0.02)
         """
+with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo:
     # Main Chat Interface
     with gr.Tab("EUDR Bot"):
                     show_label=False,
                     layout="panel",
                     avatar_images=(None, "chatbot_icon_2.png"),
+                    height="auto"
                 )
                 # Feedback UI
                             sample_groups.append(group_examples)
                     # Sources Tab
+                    with gr.Tab("Fuentes", id=1, elem_id="sources-textbox"):
                         sources_textbox = gr.HTML(
                             show_label=False,
                             value="Los documentos originales aparecerán aquí después de que hagas una pregunta..."

utils/__pycache__/generator.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/generator.cpython-310.pyc and b/utils/__pycache__/generator.cpython-310.pyc differ

utils/__pycache__/retriever.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/retriever.cpython-310.pyc and b/utils/__pycache__/retriever.cpython-310.pyc differ

utils/__pycache__/whisp_api.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/whisp_api.cpython-310.pyc and b/utils/__pycache__/whisp_api.cpython-310.pyc differ

utils/generator.py CHANGED Viewed

@@ -113,67 +113,67 @@ chat_model = get_chat_model()
 # ---------------------------------------------------------------------
 # Context processing - may need further refinement (i.e. to manage other data sources)
 # ---------------------------------------------------------------------
-def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """
-    Extract only relevant fields from retrieval results.
-    Args:
-        retrieval_results: List of JSON objects from retriever
-    Returns:
-        List of processed objects with only relevant fields
-    """
-    retrieval_results = ast.literal_eval(retrieval_results)
-    processed_results = []
-    for result in retrieval_results:
-        # Extract the answer content
-        answer = result.get('answer', '')
-        # Extract document identification from metadata
-        metadata = result.get('answer_metadata', {})
-        doc_info = {
-            'answer': answer,
-            'filename': metadata.get('filename', 'Unknown'),
-            'page': metadata.get('page', 'Unknown'),
-            'year': metadata.get('year', 'Unknown'),
-            'source': metadata.get('source', 'Unknown'),
-            'document_id': metadata.get('_id', 'Unknown')
-        }
-        processed_results.append(doc_info)
-    return processed_results
-def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
-    """
-    Format processed retrieval results into a context string for the LLM.
-    Args:
-        processed_results: List of processed objects with relevant fields
-    Returns:
-        Formatted context string
-    """
-    if not processed_results:
-        return ""
-    context_parts = []
-    for i, result in enumerate(processed_results, 1):
-        doc_reference = f"[Document {i}: {result['filename']}"
-        if result['page'] != 'Unknown':
-            doc_reference += f", Page {result['page']}"
-        if result['year'] != 'Unknown':
-            doc_reference += f", Year {result['year']}"
-        doc_reference += "]"
-        context_part = f"{doc_reference}\n{result['answer']}\n"
-        context_parts.append(context_part)
-    return "\n".join(context_parts)
 # ---------------------------------------------------------------------
 # Core generation function for both Gradio UI and MCP
@@ -265,11 +265,11 @@ async def generate(query: str, context: Union[str, List[Dict[str, Any]]]) -> str
             return "Error: No retrieval results provided"
         # Process the retrieval results
-        processed_results = extract_relevant_fields(context)
-        formatted_context = format_context_from_results(processed_results)
-        if not formatted_context.strip():
-            return "Error: No valid content found in retrieval results"
     elif isinstance(context, str):
         if not context.strip():

 # ---------------------------------------------------------------------
 # Context processing - may need further refinement (i.e. to manage other data sources)
 # ---------------------------------------------------------------------
+# def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+#     """
+#     Extract only relevant fields from retrieval results.
+#     Args:
+#         retrieval_results: List of JSON objects from retriever
+#     Returns:
+#         List of processed objects with only relevant fields
+#     """
+#     retrieval_results = ast.literal_eval(retrieval_results)
+#     processed_results = []
+#     for result in retrieval_results:
+#         # Extract the answer content
+#         answer = result.get('answer', '')
+#         # Extract document identification from metadata
+#         metadata = result.get('answer_metadata', {})
+#         doc_info = {
+#             'answer': answer,
+#             'filename': metadata.get('filename', 'Unknown'),
+#             'page': metadata.get('page', 'Unknown'),
+#             'year': metadata.get('year', 'Unknown'),
+#             'source': metadata.get('source', 'Unknown'),
+#             'document_id': metadata.get('_id', 'Unknown')
+#         }
+#         processed_results.append(doc_info)
+#     return processed_results
+# def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
+#     """
+#     Format processed retrieval results into a context string for the LLM.
+#     Args:
+#         processed_results: List of processed objects with relevant fields
+#     Returns:
+#         Formatted context string
+#     """
+#     if not processed_results:
+#         return ""
+#     context_parts = []
+#     for i, result in enumerate(processed_results, 1):
+#         doc_reference = f"[Document {i}: {result['filename']}"
+#         if result['page'] != 'Unknown':
+#             doc_reference += f", Page {result['page']}"
+#         if result['year'] != 'Unknown':
+#             doc_reference += f", Year {result['year']}"
+#         doc_reference += "]"
+#         context_part = f"{doc_reference}\n{result['answer']}\n"
+#         context_parts.append(context_part)
+#     return "\n".join(context_parts)
 # ---------------------------------------------------------------------
 # Core generation function for both Gradio UI and MCP
             return "Error: No retrieval results provided"
         # Process the retrieval results
+        # processed_results = extract_relevant_fields(context)
+        formatted_context = context
+        # if not formatted_context.strip():
+        #     return "Error: No valid content found in retrieval results"
     elif isinstance(context, str):
         if not context.strip():