Asistente_EUDR

Running on CPU Upgrade

File size: 23,022 Bytes

c83f30f
054da8d
4cb1652
a1c7621
054da8d
4cb1652
b1d15aa
56217e4
cc3c7b8
c464ae6
 
95c8547
 
 
 
c0ec585
95c8547
 
 
 
 
 
 
 
5a4f54c
95c8547
 
 
 
 
 
 
 
 
 
 
 
 
 
c71c0cf
3eaba6e
26991e0
 
 
 
3eaba6e
26991e0
 
 
 
3eaba6e
26991e0
 
 
 
3eaba6e
 
1aa747d
37c2e4c
4a66b7a
8b608dc
 
 
c593cde
 
0b1d4d1
c593cde
3eaba6e
c71c0cf
3eaba6e
 
054da8d
3eaba6e
c71c0cf
3eaba6e
054da8d
c464ae6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95c8547
c71c0cf
 
c593cde
 
 
 
1b3f70d
bf13ae6
054da8d
bf13ae6
054da8d
3eaba6e
054da8d
c0ec585
c71c0cf
c593cde
bf13ae6
a1c7621
c593cde
 
 
 
 
 
 
 
 
 
 
 
 
a1c7621
 
0b1d4d1
1b3f70d
0b1d4d1
 
78c9684
c464ae6
 
 
 
 
 
 
 
 
08a0540
 
95c8547
 
 
 
 
 
 
 
0b1d4d1
 
8b608dc
 
37c2e4c
59ed861
8b608dc
59ed861
 
8b608dc
59ed861
 
c464ae6
59ed861
 
 
 
0b1d4d1
3eaba6e
bebc7ce
 
 
 
c593cde
 
 
 
 
 
 
bebc7ce
c593cde
 
8b608dc
c593cde
 
 
8b608dc
c593cde
bebc7ce
 
c593cde
bebc7ce
 
 
 
3eaba6e
c71c0cf
59ed861
c71c0cf
 
 
 
 
 
 
 
 
 
 
3eaba6e
 
c71c0cf
3eaba6e
 
 
 
 
054da8d
c71c0cf
054da8d
3eaba6e
 
054da8d
3eaba6e
054da8d
 
c71c0cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3eaba6e
0300b79
c71c0cf
4a66b7a
c71c0cf
4a66b7a
 
 
bf13ae6
4a66b7a
fd583b7
bf13ae6
 
3eaba6e
c464ae6
c71c0cf
 
bf13ae6
3eaba6e
c71c0cf
054da8d
 
3eaba6e
 
 
 
01090e9
c464ae6
054da8d
c71c0cf
 
 
 
bf13ae6
c71c0cf
bf13ae6
 
 
c71c0cf
 
 
 
bf13ae6
c71c0cf
 
 
 
 
 
 
3eaba6e
054da8d
c71c0cf
 
4a66b7a
054da8d
d1c9732
4a66b7a
2d08a77
d1c9732
054da8d
c71c0cf
 
d1c9732
c71c0cf
bf13ae6
c71c0cf
 
 
3eaba6e
c71c0cf
 
 
2d08a77
c71c0cf
 
 
 
 
 
 
d1c9732
c71c0cf
 
78c9684
c71c0cf
 
 
 
 
26991e0
3eaba6e
c71c0cf
3eaba6e
c71c0cf
 
 
 
 
4a66b7a
c71c0cf
 
 
3eaba6e
 
054da8d
3eaba6e
c71c0cf
 
 
 
 
 
3eaba6e
c71c0cf
 
4a66b7a
c71c0cf
 
4a66b7a
c71c0cf
 
 
26991e0
c71c0cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26991e0
c71c0cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26991e0
c71c0cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd583b7
c71c0cf
fd583b7
 
 
c71c0cf
 
 
 
 
 
 
 
 
 
 
c593cde
4cb1652
bebc7ce
 
c593cde
bebc7ce
054da8d
c71c0cf
 
3eaba6e
 
 
 
 
 
 
c71c0cf
3eaba6e
 
c71c0cf
 
 
 
 
3eaba6e
c71c0cf
 
3eaba6e
 
 
 
 
 
 
c71c0cf
3eaba6e
 
c71c0cf
 
 
 
 
 
 
95c8547
 
 
 
 
 
 
 
 
 
 
 
 
 
c71c0cf
 
95c8547
 
c71c0cf
 
 
 
95c8547
 
c71c0cf
3eaba6e
054da8d
c71c0cf
3eaba6e
95c8547

import gradio as gr
import time
import pandas as pd
import asyncio
from uuid import uuid4
from gradio_client import Client, handle_file
from utils.whisp_api import handle_geojson_upload
from utils.retriever import retrieve_paragraphs
from utils.generator import generate
import json
import ast
from utils.logger import ChatLogger
from pathlib import Path
from huggingface_hub import CommitScheduler, HfApi
import os 

# fetch tokens from Gradio secrets
SPACES_LOG = os.environ.get("EUDR_SPACES_LOG")
if not SPACES_LOG:
    raise ValueError("EUDR_SPACES_LOG not found in environment")
# create the local logs repo
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"

# the logs are written to dataset repo periodically from local logs
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
scheduler = CommitScheduler(
     repo_id="GIZ/spaces_logs",
     repo_type="dataset",
     folder_path=JSON_DATASET_DIR,
     path_in_repo="eudr_chatbot",
     token=SPACES_LOG ) 



# Initialize logger with shared scheduler
# scheduler.start()  # Start the scheduler
chat_logger = ChatLogger(scheduler=scheduler)
# Sample questions for examples 
SAMPLE_QUESTIONS = {
    "Análisis de la deforestación": [
        "¿Cuáles son los principales puntos críticos de deforestación en Ecuador?",
        "Muéstrame las tendencias de deforestación en el área cargada.",
        "¿Qué productos básicos están impulsando la deforestación en Guatemala?"
    ],
    "Cumplimiento de la EUDR": [
        "¿Cuáles son los requisitos clave del EUDR para las importaciones de café?",
        "¿Cómo puedo demostrar que he actuado con la debida diligencia en mi cadena de suministro?",
        "¿Qué documentación se necesita para cumplir con la EUDR?"
    ],
    "Evaluación de riesgos": [
        "¿Cuál es el nivel de riesgo de deforestación en esta región?",
        "¿Cómo evalúo los riesgos de la cadena de suministro?",
        "¿Cuáles son los plazos de cumplimiento?"
    ]
}


BEGINNING_TEXT = "**Respuesta generada mediante inteligencia artificíal:** \n\n" 
# Spanish disclaimer text
DISCLAIMER_TEXT = "\n\n---\n ⚠️ **Descargo de responsabilidad:** El chatbot EUDR puede cometer errores. Verifique la información importante con fuentes oficiales. \n"

# Global variable to cache API results and prevent double calls
geojson_analysis_cache = {}

# Initialize Chat
def start_chat(query, history):
    """Start a new chat interaction"""
    history = history + [(query, None)]
    return gr.update(interactive=False), gr.update(selected=1), history

def finish_chat():
    """Finish chat and reset input"""
    return gr.update(interactive=True, value="")


def make_html_source(source,i):
    """
    takes the text and converts it into html format for display in "source" side tab
    """
    meta = source['answer_metadata']
    content = source['answer'].strip()

    name = meta['filename']
    card = f"""
        <div class="card" id="doc{i}">
            <div class="card-content">
                <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
                <p>{content}</p>
            </div>
            <div class="card-footer">
                <span>{name}</span>
                <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
                    <span role="img" aria-label="Open PDF">🔗</span>
                </a>
            </div>
        </div>
        """

    return card

async def chat_response(query, history, method, country, uploaded_file, request=None):
    """Generate chat response based on method and inputs"""
    
    # Skip processing if this is an auto-generated file analysis message
    if query.startswith("📄 GeoJSON cargado"):
        return
    
    # Validate inputs
    if method == "Subir GeoJson":
        if uploaded_file is None:
            warning_message = "⚠️ **No se ha cargado ningún GeoJSON.** Por favor, carga primero un GeoJSON."
            history[-1] = (query, warning_message)
            yield history, ""
            return
 
    
    # Handle GeoJSON upload → use cached results
    if method == "Subir GeoJson" and uploaded_file:
        try:
            # Check if we have cached results for this file
            file_key = f"{uploaded_file.name}_{uploaded_file.size if hasattr(uploaded_file, 'size') else 'unknown'}"
            
            if file_key in geojson_analysis_cache:
                # Use cached results
                response = geojson_analysis_cache[file_key]
            else:
                # Call API and cache results
                stats_result = handle_geojson_upload(uploaded_file)
                formatted_stats = stats_result[0]
                geojson_analysis_cache[file_key] = formatted_stats
                response = formatted_stats
                
        except Exception as e:
            response = f"Error processing file: {str(e)}"
    
    # Handle "Talk to Reports"
    else: 
        try: 
            retrieved_paragraphs = retrieve_paragraphs(query, country)
            context_retrieved = ast.literal_eval(retrieved_paragraphs)
            context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved)
            context_retrieved_lst = [doc['answer'] for doc in context_retrieved]
            # print(country)
            # print(retrieved_paragraphs)
            docs_html = []
            for i, d in enumerate(context_retrieved, 1):
                docs_html.append(make_html_source(d, i))
            docs_html = "".join(docs_html)
            response = await generate(query=query, context=retrieved_paragraphs)
            
            # Log the interaction
            chat_logger.log(
                query=query,
                answer=response,
                retrieved_content=context_retrieved_lst,
                request=request
            )

        except Exception as e:
            response = f"Error retrieving information: {str(e)}"

    # Add disclaimer to the response
    response_with_disclaimer = BEGINNING_TEXT + response + DISCLAIMER_TEXT
    

    displayed_response = ""     
    
    for i, char in enumerate(response_with_disclaimer):         
        displayed_response += char        
        history[-1] = (query, displayed_response)         
        yield history, docs_html        
        # Only add delay every few characters to avoid being too slow         
        if i % 3 == 0:  # Adjust this number to control speed             
            await asyncio.sleep(0.02)
 


def auto_analyze_file(file, history):
    """Automatically analyze uploaded GeoJSON file and add results to chat"""
    if file is not None:
        try:
            # Call API immediately and cache results
            file_key = f"{file.name}_{file.size if hasattr(file, 'size') else 'unknown'}"
            
            if file_key not in geojson_analysis_cache:
                stats_result = handle_geojson_upload(file)
                formatted_stats = stats_result[0]
                geojson_analysis_cache[file_key] = formatted_stats
            
            # Add analysis results directly to chat (no intermediate message)
            analysis_query = "📄 Análisis del GeoJSON cargado"
            cached_result = geojson_analysis_cache[file_key] + DISCLAIMER_TEXT
            
            # Add both query and response to history
            history = history + [(analysis_query, cached_result)]
            return history, "**Sources:** WhispAPI Analysis Results" 
            
        except Exception as e:
            error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
            history = history + [("📄 Error en análisis GeoJSON", error_msg)]
            return history, ""
    
    return history, ""

def toggle_search_method(method):
    """Toggle between GeoJSON upload and country selection"""
    if method == "Subir GeoJson":
        return (
            gr.update(visible=True),   # geojson_section
            gr.update(visible=False),  # reports_section
            gr.update(value=None),     # dropdown_country
        )
    else:  # "Talk to Reports"
        return (
            gr.update(visible=False),  # geojson_section 
            gr.update(visible=True),   # reports_section
            gr.update(),               # dropdown_country
        )

def change_sample_questions(key):
    """Update visible examples based on selected category"""
    keys = list(SAMPLE_QUESTIONS.keys())
    index = keys.index(key)
    visible_bools = [False] * len(keys)
    visible_bools[index] = True
    return [gr.update(visible=visible_bools[i]) for i in range(len(keys))]

# Set up Gradio Theme
theme = gr.themes.Base(
    primary_hue="green",
    secondary_hue="blue",
    font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
    text_size=gr.themes.utils.sizes.text_sm,
)

# Custom CSS for DataFrame styling
custom_css = """
/* DataFrame text sizing - Modify these values to change text size */
.dataframe table {
    font-size: 12px !important;  /* Change this value (e.g., 10px, 14px, 16px) */
}

.dataframe th {
    font-size: 13px !important;  /* Header text size */
    font-weight: 600 !important;
}

.dataframe td {
    font-size: 12px !important;  /* Cell text size */
    padding: 8px !important;     /* Cell padding */
}

/* Alternative size classes - change elem_classes="dataframe-small" in DataFrame component */
.dataframe-small table { font-size: 10px !important; }
.dataframe-small th { font-size: 11px !important; }
.dataframe-small td { font-size: 10px !important; }

.dataframe-medium table { font-size: 14px !important; }
.dataframe-medium th { font-size: 15px !important; }
.dataframe-medium td { font-size: 14px !important; }

.dataframe-large table { font-size: 16px !important; }
.dataframe-large th { font-size: 17px !important; }
.dataframe-large td { font-size: 16px !important; }
"""

init_prompt = """
        Hola, soy Asistente EUDR, un asistente conversacional basado en inteligencia artificial diseñado para ayudarle a comprender el cumplimiento y el análisis del Reglamento de la UE sobre la deforestación. Responderé a sus preguntas utilizando los informes EUDR y los archivos GeoJSON cargados.

        💡 **Cómo utilizarlo (panel a la derecha)**

        **Modo de uso:** elija entre subir un archivo GeoJSON para su análisis o consultar los informes EUDR filtrados por país.
        **Ejemplos:** seleccione entre preguntas de ejemplo seleccionadas de diferentes categorías.
        **Referencias:** consulte las fuentes de contenido utilizadas para la verificación de datos.

        ⚠️ Para conocer las limitaciones y la información sobre la recopilación de datos, consulte la pestaña «Exención de responsibilidad».
        ⚠️ Al utilizar esta aplicación, usted acepta que recopilemos estadísticas de uso (como preguntas formuladas, comentarios realizados, duración de la sesión, tipo de dispositivo e información geográfica anónima) para comprender el rendimiento y mejorar continuamente la herramienta, basándonos en nuestro interés legítimo por mejorar nuestros servicios.

        """

with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo:
    
    # Main Chat Interface
    with gr.Tab("EUDR Bot"):
        with gr.Row():
            # Left column - Chat interface (2/3 width)
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    value=[(None, init_prompt)],
                    show_copy_button=True,
                    show_label=False,
                    layout="panel",
                    avatar_images=(None, "chatbot_icon_2.png"),
                    height="auto"
                )
                
                # Feedback UI
                with gr.Column():
                    with gr.Row(visible=False) as feedback_row:
                        gr.Markdown("¿Te ha sido útil esta respuesta?")
                        with gr.Row():
                            okay_btn = gr.Button("👍 De acuerdo", size="sm")
                            not_okay_btn = gr.Button("👎 No según lo esperado", size="sm")
                    feedback_thanks = gr.Markdown("Gracias por los comentarios.", visible=False)
                
                # Input textbox
                with gr.Row():
                    textbox = gr.Textbox(
                        placeholder="¡Pregúntame cualquier cosa sobre el cumplimiento de la normativa EUDR o sube tu GeoJSON para su análisis!",
                        show_label=False,
                        scale=7,
                        lines=1,
                        interactive=True
                    )
            
            # Right column - Controls and tabs (1/3 width)
            with gr.Column(scale=1, variant="panel"):
                with gr.Tabs() as tabs:
                    
                    # Data Sources Tab
                    with gr.Tab("Modo de uso", id=2):
                        search_method = gr.Radio(
                            choices=["Hablar con documentos", "Subir GeoJson"],
                            label="Elija una fuente de datos",
                            info="Sube un GeoJSON para su análisis o selecciona informes EUDR específicos de cada país.",
                            value="Hablar con documentos",
                        )
                        
                        # GeoJSON Upload Section
                        with gr.Group(visible=False) as geojson_section:
                            uploaded_file = gr.File(
                                label="Subir GeoJson",
                                file_types=[".geojson", ".json"],
                                file_count="single"
                            )
                            upload_status = gr.Markdown("", visible=False)
                            
                            # Results table for WHISP API response
                            results_table = gr.DataFrame(
                                label="Resultados del análisis",
                                visible=False,
                                interactive=False,
                                wrap=True,
                                elem_classes="dataframe"
                            )
                        
                        # Talk to Reports Section
                        with gr.Group(visible=True) as reports_section:
                            dropdown_country = gr.Dropdown(
                                ["Ecuador", "Guatemala"],
                                label="Selecciona país",                                
                                value=None,
                                interactive=True,
                            )
                    
                    # Examples Tab
                    with gr.Tab("Ejemplos", id=0):
                        examples_hidden = gr.Textbox(visible=False)
                        
                        first_key = list(SAMPLE_QUESTIONS.keys())[0]
                        dropdown_samples = gr.Dropdown(
                            SAMPLE_QUESTIONS.keys(),
                            value=first_key,
                            interactive=True,
                            show_label=True,
                            label="Seleccione un ejemplo de pregunta."
                        )
                        
                        # Create example sections
                        sample_groups = []
                        for i, (key, questions) in enumerate(SAMPLE_QUESTIONS.items()):
                            examples_visible = True if i == 0 else False
                            with gr.Row(visible=examples_visible) as group_examples:
                                gr.Examples(
                                    questions,
                                    [examples_hidden],
                                    examples_per_page=8,
                                    run_on_click=False,
                                )
                            sample_groups.append(group_examples)
                    
                    # Sources Tab
                    with gr.Tab("Referencia", id=1, elem_id="sources-textbox"):
                        sources_textbox = gr.HTML(
                            show_label=False,
                            value="Los documentos originales aparecerán aquí después de que haga una pregunta..."
                        )
    
    # Guidelines Tab
    with gr.Tab("Directrices"):
        gr.Markdown("""
        #### Welcome to EUDR Q&A! 
        
        This AI-powered assistant helps you understand EU Deforestation Regulation compliance and analyze geographic data.
        
        ## 💬 How to Ask Effective Questions
        
        | ❌ Less Effective | ✅ More Effective |
        |------------------|-------------------|
        | "What is deforestation?" | "What are the main deforestation hotspots in Ecuador?" |
        | "Tell me about compliance" | "What EUDR requirements apply to coffee imports from Guatemala?" |
        | "Show me data" | "What is the deforestation rate in the uploaded region?" |
        
        ## 🔍 Using Data Sources
        
        **Upload GeoJSON:** Upload your geographic data files for automatic analysis via WHISP API  
        **Talk to Reports:** Select Ecuador or Guatemala for country-specific EUDR analysis
        
        ## ⭐ Best Practices
        
        - Be specific about regions, commodities, or time periods
        - Ask one question at a time for clearer answers
        - Use follow-up questions to explore topics deeper
        - Provide context when possible
        """)
    
    # About Tab
    with gr.Tab("Información"):
        gr.Markdown("""
        ## About EUDR Q&A
        
        The **EU Deforestation Regulation (EUDR)** requires companies to ensure that specific commodities 
        placed on the EU market are deforestation-free and legally produced.
        
        This AI-powered tool helps stakeholders:
        - Understand EUDR compliance requirements
        - Analyze geographic deforestation data using WHISP API
        - Assess supply chain risks
        - Navigate complex regulatory landscapes
        
        **Developed by GIZ** to enhance accessibility and understanding of EUDR requirements 
        through advanced AI and geographic data processing capabilities.
        
        ### Key Features:
        - Automatic analysis of uploaded GeoJSON files via WHISP API
        - Country-specific EUDR compliance guidance
        - Real-time question answering with source citations
        - User-friendly interface for complex regulatory information
        """)
    
    # Disclaimer Tab
    with gr.Tab("Exención de responsabilidad"):
        gr.Markdown("""
        ## Important Disclaimers
        
        ⚠️ **Scope & Limitations:**
        - This tool is designed for EUDR compliance assistance and geographic data analysis
        - Responses should not be considered official legal or compliance advice
        - Always consult qualified professionals for official compliance decisions
        
        ⚠️ **Data & Privacy:**
        - Uploaded GeoJSON files are processed via external WHISP API for analysis
        - We collect usage statistics to improve the tool
        - Files are processed temporarily and not permanently stored
        
        ⚠️ **AI Limitations:**
        - Responses are AI-generated and may contain inaccuracies
        - The tool is a prototype under continuous development
        - Always verify important information with authoritative sources
        
        **Data Collection:** We collect questions, answers, feedback, and anonymized usage statistics 
        to improve tool performance based on legitimate interest in service enhancement.By using this chatbot, you agree to these terms and acknowledge that you are solely responsible for any reliance on or actions taken based on its responses.
        
        **Technical Information:** User can read more about the technical information about the tool in [**Readme**](https://huggingface.co/spaces/GIZ/Asistente_EUDR/blob/main/README.md) of this tool.
        
        **This is just a prototype and being tested and worked upon, so its not perfect and may sometimes give irrelevant answers**. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.   
        """)
    
    # Event Handlers
    
    # Toggle search method
    search_method.change(
        fn=toggle_search_method,
        inputs=[search_method],
        outputs=[geojson_section, reports_section, dropdown_country]
    )
    
    # File upload - automatically analyze and display in chat (SIMPLIFIED)
    uploaded_file.change(
        fn=auto_analyze_file,
        inputs=[uploaded_file, chatbot],
        outputs=[chatbot, sources_textbox],
        queue=False
    )
    
    # Chat functionality
    textbox.submit(
        start_chat,
        [textbox, chatbot],
        [textbox, tabs, chatbot],
        queue=False
    ).then(
        chat_response,
        [textbox, chatbot, search_method, dropdown_country, uploaded_file],
        [chatbot, sources_textbox]
    ).then(
        lambda: gr.update(visible=True),
        outputs=[feedback_row]
    ).then(
        finish_chat,
        outputs=[textbox]
    )
    
    # Examples functionality
    examples_hidden.change(
        start_chat,
        [examples_hidden, chatbot],
        [textbox, tabs, chatbot],
        queue=False
    ).then(
        chat_response,
        [examples_hidden, chatbot, search_method, dropdown_country, uploaded_file],
        [chatbot, sources_textbox]
    ).then(
        lambda: gr.update(visible=True),
        outputs=[feedback_row]
    ).then(
        finish_chat,
        outputs=[textbox]
    )
    
 
    # Feedback buttons
    def log_feedback(feedback, chatbot):
        # Get the last interaction from chatbot history
        if chatbot and len(chatbot) > 0:
            last_query, last_response = chatbot[-1]
            chat_logger.log(
                query=last_query,
                answer=last_response,
                retrieved_content=[],  # We don't have access to the original retrieved content here
                feedback=feedback
            )
        return (gr.update(visible=False), gr.update(visible=True))

    # Feedback buttons
    okay_btn.click(
        lambda chatbot: log_feedback("positive", chatbot),
        inputs=[chatbot],
        outputs=[feedback_row, feedback_thanks]
    )
    
    not_okay_btn.click(
        lambda chatbot: log_feedback("negative", chatbot),
        inputs=[chatbot],
        outputs=[feedback_row, feedback_thanks]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()