import gradio as gr import time import pandas as pd import asyncio from uuid import uuid4 from gradio_client import Client, handle_file from utils.whisp_api import handle_geojson_upload from utils.retriever import retrieve_paragraphs from utils.generator import generate import json import ast from utils.logger import ChatLogger from pathlib import Path from huggingface_hub import CommitScheduler, HfApi import os # fetch tokens from Gradio secrets SPACES_LOG = os.environ.get("EUDR_SPACES_LOG") if not SPACES_LOG: raise ValueError("EUDR_SPACES_LOG not found in environment") # create the local logs repo JSON_DATASET_DIR = Path("json_dataset") JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json" # the logs are written to dataset repo periodically from local logs # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver scheduler = CommitScheduler( repo_id="GIZ/spaces_logs", repo_type="dataset", folder_path=JSON_DATASET_DIR, path_in_repo="eudr_chatbot", token=SPACES_LOG ) # Initialize logger with shared scheduler # scheduler.start() # Start the scheduler chat_logger = ChatLogger(scheduler=scheduler) # Sample questions for examples SAMPLE_QUESTIONS = { "Análisis de la deforestación": [ "¿Cuáles son los principales puntos críticos de deforestación en Ecuador?", "Muéstrame las tendencias de deforestación en el área cargada.", "¿Qué productos básicos están impulsando la deforestación en Guatemala?" ], "Cumplimiento de la EUDR": [ "¿Cuáles son los requisitos clave del EUDR para las importaciones de café?", "¿Cómo puedo demostrar que he actuado con la debida diligencia en mi cadena de suministro?", "¿Qué documentación se necesita para cumplir con la EUDR?" ], "Evaluación de riesgos": [ "¿Cuál es el nivel de riesgo de deforestación en esta región?", "¿Cómo evalúo los riesgos de la cadena de suministro?", "¿Cuáles son los plazos de cumplimiento?" ] } BEGINNING_TEXT = "**Respuesta generada mediante inteligencia artificíal:** \n\n" # Spanish disclaimer text DISCLAIMER_TEXT = "\n\n---\n ⚠️ **Descargo de responsabilidad:** El chatbot EUDR puede cometer errores. Verifique la información importante con fuentes oficiales. \n" # Global variable to cache API results and prevent double calls geojson_analysis_cache = {} # Initialize Chat def start_chat(query, history): """Start a new chat interaction""" history = history + [(query, None)] return gr.update(interactive=False), gr.update(selected=1), history def finish_chat(): """Finish chat and reset input""" return gr.update(interactive=True, value="") def make_html_source(source,i): """ takes the text and converts it into html format for display in "source" side tab """ meta = source['answer_metadata'] content = source['answer'].strip() name = meta['filename'] card = f"""

Doc {i} - {meta['filename']} - Page {int(meta['page'])}

{content}

""" return card async def chat_response(query, history, method, country, uploaded_file, request=None): """Generate chat response based on method and inputs""" # Skip processing if this is an auto-generated file analysis message if query.startswith("📄 GeoJSON cargado"): return # Validate inputs if method == "Subir GeoJson": if uploaded_file is None: warning_message = "⚠️ **No se ha cargado ningún GeoJSON.** Por favor, carga primero un GeoJSON." history[-1] = (query, warning_message) yield history, "" return # Handle GeoJSON upload → use cached results if method == "Subir GeoJson" and uploaded_file: try: # Check if we have cached results for this file file_key = f"{uploaded_file.name}_{uploaded_file.size if hasattr(uploaded_file, 'size') else 'unknown'}" if file_key in geojson_analysis_cache: # Use cached results response = geojson_analysis_cache[file_key] else: # Call API and cache results stats_result = handle_geojson_upload(uploaded_file) formatted_stats = stats_result[0] geojson_analysis_cache[file_key] = formatted_stats response = formatted_stats except Exception as e: response = f"Error processing file: {str(e)}" # Handle "Talk to Reports" else: try: retrieved_paragraphs = retrieve_paragraphs(query, country) context_retrieved = ast.literal_eval(retrieved_paragraphs) context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved) context_retrieved_lst = [doc['answer'] for doc in context_retrieved] # print(country) # print(retrieved_paragraphs) docs_html = [] for i, d in enumerate(context_retrieved, 1): docs_html.append(make_html_source(d, i)) docs_html = "".join(docs_html) response = await generate(query=query, context=retrieved_paragraphs) # Log the interaction chat_logger.log( query=query, answer=response, retrieved_content=context_retrieved_lst, request=request ) except Exception as e: response = f"Error retrieving information: {str(e)}" # Add disclaimer to the response response_with_disclaimer = BEGINNING_TEXT + response + DISCLAIMER_TEXT displayed_response = "" for i, char in enumerate(response_with_disclaimer): displayed_response += char history[-1] = (query, displayed_response) yield history, docs_html # Only add delay every few characters to avoid being too slow if i % 3 == 0: # Adjust this number to control speed await asyncio.sleep(0.02) def auto_analyze_file(file, history): """Automatically analyze uploaded GeoJSON file and add results to chat""" if file is not None: try: # Call API immediately and cache results file_key = f"{file.name}_{file.size if hasattr(file, 'size') else 'unknown'}" if file_key not in geojson_analysis_cache: stats_result = handle_geojson_upload(file) formatted_stats = stats_result[0] geojson_analysis_cache[file_key] = formatted_stats # Add analysis results directly to chat (no intermediate message) analysis_query = "📄 Análisis del GeoJSON cargado" cached_result = geojson_analysis_cache[file_key] + DISCLAIMER_TEXT # Add both query and response to history history = history + [(analysis_query, cached_result)] return history, "**Sources:** WhispAPI Analysis Results" except Exception as e: error_msg = f"❌ Error processing GeoJSON file: {str(e)}" history = history + [("📄 Error en análisis GeoJSON", error_msg)] return history, "" return history, "" def toggle_search_method(method): """Toggle between GeoJSON upload and country selection""" if method == "Subir GeoJson": return ( gr.update(visible=True), # geojson_section gr.update(visible=False), # reports_section gr.update(value=None), # dropdown_country ) else: # "Talk to Reports" return ( gr.update(visible=False), # geojson_section gr.update(visible=True), # reports_section gr.update(), # dropdown_country ) def change_sample_questions(key): """Update visible examples based on selected category""" keys = list(SAMPLE_QUESTIONS.keys()) index = keys.index(key) visible_bools = [False] * len(keys) visible_bools[index] = True return [gr.update(visible=visible_bools[i]) for i in range(len(keys))] # Set up Gradio Theme theme = gr.themes.Base( primary_hue="green", secondary_hue="blue", font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], text_size=gr.themes.utils.sizes.text_sm, ) # Custom CSS for DataFrame styling custom_css = """ /* DataFrame text sizing - Modify these values to change text size */ .dataframe table { font-size: 12px !important; /* Change this value (e.g., 10px, 14px, 16px) */ } .dataframe th { font-size: 13px !important; /* Header text size */ font-weight: 600 !important; } .dataframe td { font-size: 12px !important; /* Cell text size */ padding: 8px !important; /* Cell padding */ } /* Alternative size classes - change elem_classes="dataframe-small" in DataFrame component */ .dataframe-small table { font-size: 10px !important; } .dataframe-small th { font-size: 11px !important; } .dataframe-small td { font-size: 10px !important; } .dataframe-medium table { font-size: 14px !important; } .dataframe-medium th { font-size: 15px !important; } .dataframe-medium td { font-size: 14px !important; } .dataframe-large table { font-size: 16px !important; } .dataframe-large th { font-size: 17px !important; } .dataframe-large td { font-size: 16px !important; } """ init_prompt = """ Hola, soy Asistente EUDR, un asistente conversacional basado en inteligencia artificial diseñado para ayudarle a comprender el cumplimiento y el análisis del Reglamento de la UE sobre la deforestación. Responderé a sus preguntas utilizando los informes EUDR y los archivos GeoJSON cargados. 💡 **Cómo utilizarlo (panel a la derecha)** **Modo de uso:** elija entre subir un archivo GeoJSON para su análisis o consultar los informes EUDR filtrados por país. **Ejemplos:** seleccione entre preguntas de ejemplo seleccionadas de diferentes categorías. **Referencias:** consulte las fuentes de contenido utilizadas para la verificación de datos. ⚠️ Para conocer las limitaciones y la información sobre la recopilación de datos, consulte la pestaña «Exención de responsibilidad». ⚠️ Al utilizar esta aplicación, usted acepta que recopilemos estadísticas de uso (como preguntas formuladas, comentarios realizados, duración de la sesión, tipo de dispositivo e información geográfica anónima) para comprender el rendimiento y mejorar continuamente la herramienta, basándonos en nuestro interés legítimo por mejorar nuestros servicios. """ with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo: # Main Chat Interface with gr.Tab("EUDR Bot"): with gr.Row(): # Left column - Chat interface (2/3 width) with gr.Column(scale=2): chatbot = gr.Chatbot( value=[(None, init_prompt)], show_copy_button=True, show_label=False, layout="panel", avatar_images=(None, "chatbot_icon_2.png"), height="auto" ) # Feedback UI with gr.Column(): with gr.Row(visible=False) as feedback_row: gr.Markdown("¿Te ha sido útil esta respuesta?") with gr.Row(): okay_btn = gr.Button("👍 De acuerdo", size="sm") not_okay_btn = gr.Button("👎 No según lo esperado", size="sm") feedback_thanks = gr.Markdown("Gracias por los comentarios.", visible=False) # Input textbox with gr.Row(): textbox = gr.Textbox( placeholder="¡Pregúntame cualquier cosa sobre el cumplimiento de la normativa EUDR o sube tu GeoJSON para su análisis!", show_label=False, scale=7, lines=1, interactive=True ) # Right column - Controls and tabs (1/3 width) with gr.Column(scale=1, variant="panel"): with gr.Tabs() as tabs: # Data Sources Tab with gr.Tab("Modo de uso", id=2): search_method = gr.Radio( choices=["Hablar con documentos", "Subir GeoJson"], label="Elija una fuente de datos", info="Sube un GeoJSON para su análisis o selecciona informes EUDR específicos de cada país.", value="Hablar con documentos", ) # GeoJSON Upload Section with gr.Group(visible=False) as geojson_section: uploaded_file = gr.File( label="Subir GeoJson", file_types=[".geojson", ".json"], file_count="single" ) upload_status = gr.Markdown("", visible=False) # Results table for WHISP API response results_table = gr.DataFrame( label="Resultados del análisis", visible=False, interactive=False, wrap=True, elem_classes="dataframe" ) # Talk to Reports Section with gr.Group(visible=True) as reports_section: dropdown_country = gr.Dropdown( ["Ecuador", "Guatemala"], label="Selecciona país", value=None, interactive=True, ) # Examples Tab with gr.Tab("Ejemplos", id=0): examples_hidden = gr.Textbox(visible=False) first_key = list(SAMPLE_QUESTIONS.keys())[0] dropdown_samples = gr.Dropdown( SAMPLE_QUESTIONS.keys(), value=first_key, interactive=True, show_label=True, label="Seleccione un ejemplo de pregunta." ) # Create example sections sample_groups = [] for i, (key, questions) in enumerate(SAMPLE_QUESTIONS.items()): examples_visible = True if i == 0 else False with gr.Row(visible=examples_visible) as group_examples: gr.Examples( questions, [examples_hidden], examples_per_page=8, run_on_click=False, ) sample_groups.append(group_examples) # Sources Tab with gr.Tab("Referencia", id=1, elem_id="sources-textbox"): sources_textbox = gr.HTML( show_label=False, value="Los documentos originales aparecerán aquí después de que haga una pregunta..." ) # Guidelines Tab with gr.Tab("Directrices"): gr.Markdown(""" #### Welcome to EUDR Q&A! This AI-powered assistant helps you understand EU Deforestation Regulation compliance and analyze geographic data. ## 💬 How to Ask Effective Questions | ❌ Less Effective | ✅ More Effective | |------------------|-------------------| | "What is deforestation?" | "What are the main deforestation hotspots in Ecuador?" | | "Tell me about compliance" | "What EUDR requirements apply to coffee imports from Guatemala?" | | "Show me data" | "What is the deforestation rate in the uploaded region?" | ## 🔍 Using Data Sources **Upload GeoJSON:** Upload your geographic data files for automatic analysis via WHISP API **Talk to Reports:** Select Ecuador or Guatemala for country-specific EUDR analysis ## ⭐ Best Practices - Be specific about regions, commodities, or time periods - Ask one question at a time for clearer answers - Use follow-up questions to explore topics deeper - Provide context when possible """) # About Tab with gr.Tab("Información"): gr.Markdown(""" ## About EUDR Q&A The **EU Deforestation Regulation (EUDR)** requires companies to ensure that specific commodities placed on the EU market are deforestation-free and legally produced. This AI-powered tool helps stakeholders: - Understand EUDR compliance requirements - Analyze geographic deforestation data using WHISP API - Assess supply chain risks - Navigate complex regulatory landscapes **Developed by GIZ** to enhance accessibility and understanding of EUDR requirements through advanced AI and geographic data processing capabilities. ### Key Features: - Automatic analysis of uploaded GeoJSON files via WHISP API - Country-specific EUDR compliance guidance - Real-time question answering with source citations - User-friendly interface for complex regulatory information """) # Disclaimer Tab with gr.Tab("Exención de responsabilidad"): gr.Markdown(""" ## Important Disclaimers ⚠️ **Scope & Limitations:** - This tool is designed for EUDR compliance assistance and geographic data analysis - Responses should not be considered official legal or compliance advice - Always consult qualified professionals for official compliance decisions ⚠️ **Data & Privacy:** - Uploaded GeoJSON files are processed via external WHISP API for analysis - We collect usage statistics to improve the tool - Files are processed temporarily and not permanently stored ⚠️ **AI Limitations:** - Responses are AI-generated and may contain inaccuracies - The tool is a prototype under continuous development - Always verify important information with authoritative sources **Data Collection:** We collect questions, answers, feedback, and anonymized usage statistics to improve tool performance based on legitimate interest in service enhancement.By using this chatbot, you agree to these terms and acknowledge that you are solely responsible for any reliance on or actions taken based on its responses. **Technical Information:** User can read more about the technical information about the tool in [**Readme**](https://huggingface.co/spaces/GIZ/Asistente_EUDR/blob/main/README.md) of this tool. **This is just a prototype and being tested and worked upon, so its not perfect and may sometimes give irrelevant answers**. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system. """) # Event Handlers # Toggle search method search_method.change( fn=toggle_search_method, inputs=[search_method], outputs=[geojson_section, reports_section, dropdown_country] ) # File upload - automatically analyze and display in chat (SIMPLIFIED) uploaded_file.change( fn=auto_analyze_file, inputs=[uploaded_file, chatbot], outputs=[chatbot, sources_textbox], queue=False ) # Chat functionality textbox.submit( start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False ).then( chat_response, [textbox, chatbot, search_method, dropdown_country, uploaded_file], [chatbot, sources_textbox] ).then( lambda: gr.update(visible=True), outputs=[feedback_row] ).then( finish_chat, outputs=[textbox] ) # Examples functionality examples_hidden.change( start_chat, [examples_hidden, chatbot], [textbox, tabs, chatbot], queue=False ).then( chat_response, [examples_hidden, chatbot, search_method, dropdown_country, uploaded_file], [chatbot, sources_textbox] ).then( lambda: gr.update(visible=True), outputs=[feedback_row] ).then( finish_chat, outputs=[textbox] ) # Feedback buttons def log_feedback(feedback, chatbot): # Get the last interaction from chatbot history if chatbot and len(chatbot) > 0: last_query, last_response = chatbot[-1] chat_logger.log( query=last_query, answer=last_response, retrieved_content=[], # We don't have access to the original retrieved content here feedback=feedback ) return (gr.update(visible=False), gr.update(visible=True)) # Feedback buttons okay_btn.click( lambda chatbot: log_feedback("positive", chatbot), inputs=[chatbot], outputs=[feedback_row, feedback_thanks] ) not_okay_btn.click( lambda chatbot: log_feedback("negative", chatbot), inputs=[chatbot], outputs=[feedback_row, feedback_thanks] ) # Launch the app if __name__ == "__main__": demo.launch()