import gradio as gr
import time
import pandas as pd
import asyncio
from uuid import uuid4
from gradio_client import Client, handle_file
from utils.whisp_api import handle_geojson_upload
from utils.retriever import retrieve_paragraphs
from utils.generator import generate
import json
import ast
from utils.logger import ChatLogger
from pathlib import Path
from huggingface_hub import CommitScheduler, HfApi
import os
# fetch tokens from Gradio secrets
SPACES_LOG = os.environ.get("EUDR_SPACES_LOG")
if not SPACES_LOG:
raise ValueError("EUDR_SPACES_LOG not found in environment")
# create the local logs repo
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
# the logs are written to dataset repo periodically from local logs
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
scheduler = CommitScheduler(
repo_id="GIZ/spaces_logs",
repo_type="dataset",
folder_path=JSON_DATASET_DIR,
path_in_repo="eudr_chatbot",
token=SPACES_LOG )
# Initialize logger with shared scheduler
# scheduler.start() # Start the scheduler
chat_logger = ChatLogger(scheduler=scheduler)
# Sample questions for examples
SAMPLE_QUESTIONS = {
"Análisis de la deforestación": [
"¿Cuáles son los principales puntos críticos de deforestación en Ecuador?",
"Muéstrame las tendencias de deforestación en el área cargada.",
"¿Qué productos básicos están impulsando la deforestación en Guatemala?"
],
"Cumplimiento de la EUDR": [
"¿Cuáles son los requisitos clave del EUDR para las importaciones de café?",
"¿Cómo puedo demostrar que he actuado con la debida diligencia en mi cadena de suministro?",
"¿Qué documentación se necesita para cumplir con la EUDR?"
],
"Evaluación de riesgos": [
"¿Cuál es el nivel de riesgo de deforestación en esta región?",
"¿Cómo evalúo los riesgos de la cadena de suministro?",
"¿Cuáles son los plazos de cumplimiento?"
]
}
BEGINNING_TEXT = "**Respuesta generada mediante inteligencia artificíal:** \n\n"
# Spanish disclaimer text
DISCLAIMER_TEXT = "\n\n---\n ⚠️ **Descargo de responsabilidad:** El chatbot EUDR puede cometer errores. Verifique la información importante con fuentes oficiales. \n"
# Global variable to cache API results and prevent double calls
geojson_analysis_cache = {}
# Initialize Chat
def start_chat(query, history):
"""Start a new chat interaction"""
history = history + [(query, None)]
return gr.update(interactive=False), gr.update(selected=1), history
def finish_chat():
"""Finish chat and reset input"""
return gr.update(interactive=True, value="")
def make_html_source(source,i):
"""
takes the text and converts it into html format for display in "source" side tab
"""
meta = source['answer_metadata']
content = source['answer'].strip()
name = meta['filename']
card = f"""
Doc {i} - {meta['filename']} - Page {int(meta['page'])}
{content}
"""
return card
async def chat_response(query, history, method, country, uploaded_file, request=None):
"""Generate chat response based on method and inputs"""
# Skip processing if this is an auto-generated file analysis message
if query.startswith("📄 GeoJSON cargado"):
return
# Validate inputs
if method == "Subir GeoJson":
if uploaded_file is None:
warning_message = "⚠️ **No se ha cargado ningún GeoJSON.** Por favor, carga primero un GeoJSON."
history[-1] = (query, warning_message)
yield history, ""
return
# Handle GeoJSON upload → use cached results
if method == "Subir GeoJson" and uploaded_file:
try:
# Check if we have cached results for this file
file_key = f"{uploaded_file.name}_{uploaded_file.size if hasattr(uploaded_file, 'size') else 'unknown'}"
if file_key in geojson_analysis_cache:
# Use cached results
response = geojson_analysis_cache[file_key]
else:
# Call API and cache results
stats_result = handle_geojson_upload(uploaded_file)
formatted_stats = stats_result[0]
geojson_analysis_cache[file_key] = formatted_stats
response = formatted_stats
except Exception as e:
response = f"Error processing file: {str(e)}"
# Handle "Talk to Reports"
else:
try:
retrieved_paragraphs = retrieve_paragraphs(query, country)
context_retrieved = ast.literal_eval(retrieved_paragraphs)
context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved)
context_retrieved_lst = [doc['answer'] for doc in context_retrieved]
# print(country)
# print(retrieved_paragraphs)
docs_html = []
for i, d in enumerate(context_retrieved, 1):
docs_html.append(make_html_source(d, i))
docs_html = "".join(docs_html)
response = await generate(query=query, context=retrieved_paragraphs)
# Log the interaction
chat_logger.log(
query=query,
answer=response,
retrieved_content=context_retrieved_lst,
request=request
)
except Exception as e:
response = f"Error retrieving information: {str(e)}"
# Add disclaimer to the response
response_with_disclaimer = BEGINNING_TEXT + response + DISCLAIMER_TEXT
displayed_response = ""
for i, char in enumerate(response_with_disclaimer):
displayed_response += char
history[-1] = (query, displayed_response)
yield history, docs_html
# Only add delay every few characters to avoid being too slow
if i % 3 == 0: # Adjust this number to control speed
await asyncio.sleep(0.02)
def auto_analyze_file(file, history):
"""Automatically analyze uploaded GeoJSON file and add results to chat"""
if file is not None:
try:
# Call API immediately and cache results
file_key = f"{file.name}_{file.size if hasattr(file, 'size') else 'unknown'}"
if file_key not in geojson_analysis_cache:
stats_result = handle_geojson_upload(file)
formatted_stats = stats_result[0]
geojson_analysis_cache[file_key] = formatted_stats
# Add analysis results directly to chat (no intermediate message)
analysis_query = "📄 Análisis del GeoJSON cargado"
cached_result = geojson_analysis_cache[file_key] + DISCLAIMER_TEXT
# Add both query and response to history
history = history + [(analysis_query, cached_result)]
return history, "**Sources:** WhispAPI Analysis Results"
except Exception as e:
error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
history = history + [("📄 Error en análisis GeoJSON", error_msg)]
return history, ""
return history, ""
def toggle_search_method(method):
"""Toggle between GeoJSON upload and country selection"""
if method == "Subir GeoJson":
return (
gr.update(visible=True), # geojson_section
gr.update(visible=False), # reports_section
gr.update(value=None), # dropdown_country
)
else: # "Talk to Reports"
return (
gr.update(visible=False), # geojson_section
gr.update(visible=True), # reports_section
gr.update(), # dropdown_country
)
def change_sample_questions(key):
"""Update visible examples based on selected category"""
keys = list(SAMPLE_QUESTIONS.keys())
index = keys.index(key)
visible_bools = [False] * len(keys)
visible_bools[index] = True
return [gr.update(visible=visible_bools[i]) for i in range(len(keys))]
# Set up Gradio Theme
theme = gr.themes.Base(
primary_hue="green",
secondary_hue="blue",
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
text_size=gr.themes.utils.sizes.text_sm,
)
# Custom CSS for DataFrame styling
custom_css = """
/* DataFrame text sizing - Modify these values to change text size */
.dataframe table {
font-size: 12px !important; /* Change this value (e.g., 10px, 14px, 16px) */
}
.dataframe th {
font-size: 13px !important; /* Header text size */
font-weight: 600 !important;
}
.dataframe td {
font-size: 12px !important; /* Cell text size */
padding: 8px !important; /* Cell padding */
}
/* Alternative size classes - change elem_classes="dataframe-small" in DataFrame component */
.dataframe-small table { font-size: 10px !important; }
.dataframe-small th { font-size: 11px !important; }
.dataframe-small td { font-size: 10px !important; }
.dataframe-medium table { font-size: 14px !important; }
.dataframe-medium th { font-size: 15px !important; }
.dataframe-medium td { font-size: 14px !important; }
.dataframe-large table { font-size: 16px !important; }
.dataframe-large th { font-size: 17px !important; }
.dataframe-large td { font-size: 16px !important; }
"""
init_prompt = """
Hola, soy Asistente EUDR, un asistente conversacional basado en inteligencia artificial diseñado para ayudarle a comprender el cumplimiento y el análisis del Reglamento de la UE sobre la deforestación. Responderé a sus preguntas utilizando los informes EUDR y los archivos GeoJSON cargados.
💡 **Cómo utilizarlo (panel a la derecha)**
**Modo de uso:** elija entre subir un archivo GeoJSON para su análisis o consultar los informes EUDR filtrados por país.
**Ejemplos:** seleccione entre preguntas de ejemplo seleccionadas de diferentes categorías.
**Referencias:** consulte las fuentes de contenido utilizadas para la verificación de datos.
⚠️ Para conocer las limitaciones y la información sobre la recopilación de datos, consulte la pestaña «Exención de responsibilidad».
⚠️ Al utilizar esta aplicación, usted acepta que recopilemos estadísticas de uso (como preguntas formuladas, comentarios realizados, duración de la sesión, tipo de dispositivo e información geográfica anónima) para comprender el rendimiento y mejorar continuamente la herramienta, basándonos en nuestro interés legítimo por mejorar nuestros servicios.
"""
with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo:
# Main Chat Interface
with gr.Tab("EUDR Bot"):
with gr.Row():
# Left column - Chat interface (2/3 width)
with gr.Column(scale=2):
chatbot = gr.Chatbot(
value=[(None, init_prompt)],
show_copy_button=True,
show_label=False,
layout="panel",
avatar_images=(None, "chatbot_icon_2.png"),
height="auto"
)
# Feedback UI
with gr.Column():
with gr.Row(visible=False) as feedback_row:
gr.Markdown("¿Te ha sido útil esta respuesta?")
with gr.Row():
okay_btn = gr.Button("👍 De acuerdo", size="sm")
not_okay_btn = gr.Button("👎 No según lo esperado", size="sm")
feedback_thanks = gr.Markdown("Gracias por los comentarios.", visible=False)
# Input textbox
with gr.Row():
textbox = gr.Textbox(
placeholder="¡Pregúntame cualquier cosa sobre el cumplimiento de la normativa EUDR o sube tu GeoJSON para su análisis!",
show_label=False,
scale=7,
lines=1,
interactive=True
)
# Right column - Controls and tabs (1/3 width)
with gr.Column(scale=1, variant="panel"):
with gr.Tabs() as tabs:
# Data Sources Tab
with gr.Tab("Modo de uso", id=2):
search_method = gr.Radio(
choices=["Hablar con documentos", "Subir GeoJson"],
label="Elija una fuente de datos",
info="Sube un GeoJSON para su análisis o selecciona informes EUDR específicos de cada país.",
value="Hablar con documentos",
)
# GeoJSON Upload Section
with gr.Group(visible=False) as geojson_section:
uploaded_file = gr.File(
label="Subir GeoJson",
file_types=[".geojson", ".json"],
file_count="single"
)
upload_status = gr.Markdown("", visible=False)
# Results table for WHISP API response
results_table = gr.DataFrame(
label="Resultados del análisis",
visible=False,
interactive=False,
wrap=True,
elem_classes="dataframe"
)
# Talk to Reports Section
with gr.Group(visible=True) as reports_section:
dropdown_country = gr.Dropdown(
["Ecuador", "Guatemala"],
label="Selecciona país",
value=None,
interactive=True,
)
# Examples Tab
with gr.Tab("Ejemplos", id=0):
examples_hidden = gr.Textbox(visible=False)
first_key = list(SAMPLE_QUESTIONS.keys())[0]
dropdown_samples = gr.Dropdown(
SAMPLE_QUESTIONS.keys(),
value=first_key,
interactive=True,
show_label=True,
label="Seleccione un ejemplo de pregunta."
)
# Create example sections
sample_groups = []
for i, (key, questions) in enumerate(SAMPLE_QUESTIONS.items()):
examples_visible = True if i == 0 else False
with gr.Row(visible=examples_visible) as group_examples:
gr.Examples(
questions,
[examples_hidden],
examples_per_page=8,
run_on_click=False,
)
sample_groups.append(group_examples)
# Sources Tab
with gr.Tab("Referencia", id=1, elem_id="sources-textbox"):
sources_textbox = gr.HTML(
show_label=False,
value="Los documentos originales aparecerán aquí después de que haga una pregunta..."
)
# Guidelines Tab
with gr.Tab("Directrices"):
gr.Markdown("""
#### Welcome to EUDR Q&A!
This AI-powered assistant helps you understand EU Deforestation Regulation compliance and analyze geographic data.
## 💬 How to Ask Effective Questions
| ❌ Less Effective | ✅ More Effective |
|------------------|-------------------|
| "What is deforestation?" | "What are the main deforestation hotspots in Ecuador?" |
| "Tell me about compliance" | "What EUDR requirements apply to coffee imports from Guatemala?" |
| "Show me data" | "What is the deforestation rate in the uploaded region?" |
## 🔍 Using Data Sources
**Upload GeoJSON:** Upload your geographic data files for automatic analysis via WHISP API
**Talk to Reports:** Select Ecuador or Guatemala for country-specific EUDR analysis
## ⭐ Best Practices
- Be specific about regions, commodities, or time periods
- Ask one question at a time for clearer answers
- Use follow-up questions to explore topics deeper
- Provide context when possible
""")
# About Tab
with gr.Tab("Información"):
gr.Markdown("""
## About EUDR Q&A
The **EU Deforestation Regulation (EUDR)** requires companies to ensure that specific commodities
placed on the EU market are deforestation-free and legally produced.
This AI-powered tool helps stakeholders:
- Understand EUDR compliance requirements
- Analyze geographic deforestation data using WHISP API
- Assess supply chain risks
- Navigate complex regulatory landscapes
**Developed by GIZ** to enhance accessibility and understanding of EUDR requirements
through advanced AI and geographic data processing capabilities.
### Key Features:
- Automatic analysis of uploaded GeoJSON files via WHISP API
- Country-specific EUDR compliance guidance
- Real-time question answering with source citations
- User-friendly interface for complex regulatory information
""")
# Disclaimer Tab
with gr.Tab("Exención de responsabilidad"):
gr.Markdown("""
## Important Disclaimers
⚠️ **Scope & Limitations:**
- This tool is designed for EUDR compliance assistance and geographic data analysis
- Responses should not be considered official legal or compliance advice
- Always consult qualified professionals for official compliance decisions
⚠️ **Data & Privacy:**
- Uploaded GeoJSON files are processed via external WHISP API for analysis
- We collect usage statistics to improve the tool
- Files are processed temporarily and not permanently stored
⚠️ **AI Limitations:**
- Responses are AI-generated and may contain inaccuracies
- The tool is a prototype under continuous development
- Always verify important information with authoritative sources
**Data Collection:** We collect questions, answers, feedback, and anonymized usage statistics
to improve tool performance based on legitimate interest in service enhancement.By using this chatbot, you agree to these terms and acknowledge that you are solely responsible for any reliance on or actions taken based on its responses.
**Technical Information:** User can read more about the technical information about the tool in [**Readme**](https://huggingface.co/spaces/GIZ/Asistente_EUDR/blob/main/README.md) of this tool.
**This is just a prototype and being tested and worked upon, so its not perfect and may sometimes give irrelevant answers**. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.
""")
# Event Handlers
# Toggle search method
search_method.change(
fn=toggle_search_method,
inputs=[search_method],
outputs=[geojson_section, reports_section, dropdown_country]
)
# File upload - automatically analyze and display in chat (SIMPLIFIED)
uploaded_file.change(
fn=auto_analyze_file,
inputs=[uploaded_file, chatbot],
outputs=[chatbot, sources_textbox],
queue=False
)
# Chat functionality
textbox.submit(
start_chat,
[textbox, chatbot],
[textbox, tabs, chatbot],
queue=False
).then(
chat_response,
[textbox, chatbot, search_method, dropdown_country, uploaded_file],
[chatbot, sources_textbox]
).then(
lambda: gr.update(visible=True),
outputs=[feedback_row]
).then(
finish_chat,
outputs=[textbox]
)
# Examples functionality
examples_hidden.change(
start_chat,
[examples_hidden, chatbot],
[textbox, tabs, chatbot],
queue=False
).then(
chat_response,
[examples_hidden, chatbot, search_method, dropdown_country, uploaded_file],
[chatbot, sources_textbox]
).then(
lambda: gr.update(visible=True),
outputs=[feedback_row]
).then(
finish_chat,
outputs=[textbox]
)
# Feedback buttons
def log_feedback(feedback, chatbot):
# Get the last interaction from chatbot history
if chatbot and len(chatbot) > 0:
last_query, last_response = chatbot[-1]
chat_logger.log(
query=last_query,
answer=last_response,
retrieved_content=[], # We don't have access to the original retrieved content here
feedback=feedback
)
return (gr.update(visible=False), gr.update(visible=True))
# Feedback buttons
okay_btn.click(
lambda chatbot: log_feedback("positive", chatbot),
inputs=[chatbot],
outputs=[feedback_row, feedback_thanks]
)
not_okay_btn.click(
lambda chatbot: log_feedback("negative", chatbot),
inputs=[chatbot],
outputs=[feedback_row, feedback_thanks]
)
# Launch the app
if __name__ == "__main__":
demo.launch()