Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
import time | |
import pandas as pd | |
import asyncio | |
from uuid import uuid4 | |
from gradio_client import Client, handle_file | |
from utils.whisp_api import handle_geojson_upload | |
from utils.retriever import retrieve_paragraphs | |
from utils.generator import generate | |
import json | |
import ast | |
from utils.logger import ChatLogger | |
from pathlib import Path | |
from huggingface_hub import CommitScheduler, HfApi | |
import os | |
# fetch tokens from Gradio secrets | |
SPACES_LOG = os.environ.get("EUDR_SPACES_LOG") | |
if not SPACES_LOG: | |
raise ValueError("EUDR_SPACES_LOG not found in environment") | |
# create the local logs repo | |
JSON_DATASET_DIR = Path("json_dataset") | |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) | |
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json" | |
# the logs are written to dataset repo periodically from local logs | |
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver | |
scheduler = CommitScheduler( | |
repo_id="GIZ/spaces_logs", | |
repo_type="dataset", | |
folder_path=JSON_DATASET_DIR, | |
path_in_repo="eudr_chatbot", | |
token=SPACES_LOG ) | |
# Initialize logger with shared scheduler | |
# scheduler.start() # Start the scheduler | |
chat_logger = ChatLogger(scheduler=scheduler) | |
# Sample questions for examples | |
SAMPLE_QUESTIONS = { | |
"Análisis de la deforestación": [ | |
"¿Cuáles son los principales puntos críticos de deforestación en Ecuador?", | |
"Muéstrame las tendencias de deforestación en el área cargada.", | |
"¿Qué productos básicos están impulsando la deforestación en Guatemala?" | |
], | |
"Cumplimiento de la EUDR": [ | |
"¿Cuáles son los requisitos clave del EUDR para las importaciones de café?", | |
"¿Cómo puedo demostrar que he actuado con la debida diligencia en mi cadena de suministro?", | |
"¿Qué documentación se necesita para cumplir con la EUDR?" | |
], | |
"Evaluación de riesgos": [ | |
"¿Cuál es el nivel de riesgo de deforestación en esta región?", | |
"¿Cómo evalúo los riesgos de la cadena de suministro?", | |
"¿Cuáles son los plazos de cumplimiento?" | |
] | |
} | |
BEGINNING_TEXT = "**Respuesta generada mediante inteligencia artificíal:** \n\n" | |
# Spanish disclaimer text | |
DISCLAIMER_TEXT = "\n\n---\n ⚠️ **Descargo de responsabilidad:** El chatbot EUDR puede cometer errores. Verifique la información importante con fuentes oficiales. \n" | |
# Global variable to cache API results and prevent double calls | |
geojson_analysis_cache = {} | |
# Initialize Chat | |
def start_chat(query, history): | |
"""Start a new chat interaction""" | |
history = history + [(query, None)] | |
return gr.update(interactive=False), gr.update(selected=1), history | |
def finish_chat(): | |
"""Finish chat and reset input""" | |
return gr.update(interactive=True, value="") | |
def make_html_source(source,i): | |
""" | |
takes the text and converts it into html format for display in "source" side tab | |
""" | |
meta = source['answer_metadata'] | |
content = source['answer'].strip() | |
name = meta['filename'] | |
card = f""" | |
<div class="card" id="doc{i}"> | |
<div class="card-content"> | |
<h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2> | |
<p>{content}</p> | |
</div> | |
<div class="card-footer"> | |
<span>{name}</span> | |
<a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link"> | |
<span role="img" aria-label="Open PDF">🔗</span> | |
</a> | |
</div> | |
</div> | |
""" | |
return card | |
async def chat_response(query, history, method, country, uploaded_file, request=None): | |
"""Generate chat response based on method and inputs""" | |
# Skip processing if this is an auto-generated file analysis message | |
if query.startswith("📄 GeoJSON cargado"): | |
return | |
# Validate inputs | |
if method == "Subir GeoJson": | |
if uploaded_file is None: | |
warning_message = "⚠️ **No se ha cargado ningún GeoJSON.** Por favor, carga primero un GeoJSON." | |
history[-1] = (query, warning_message) | |
yield history, "" | |
return | |
# Handle GeoJSON upload → use cached results | |
if method == "Subir GeoJson" and uploaded_file: | |
try: | |
# Check if we have cached results for this file | |
file_key = f"{uploaded_file.name}_{uploaded_file.size if hasattr(uploaded_file, 'size') else 'unknown'}" | |
if file_key in geojson_analysis_cache: | |
# Use cached results | |
response = geojson_analysis_cache[file_key] | |
else: | |
# Call API and cache results | |
stats_result = handle_geojson_upload(uploaded_file) | |
formatted_stats = stats_result[0] | |
geojson_analysis_cache[file_key] = formatted_stats | |
response = formatted_stats | |
except Exception as e: | |
response = f"Error processing file: {str(e)}" | |
# Handle "Talk to Reports" | |
else: | |
try: | |
retrieved_paragraphs = retrieve_paragraphs(query, country) | |
context_retrieved = ast.literal_eval(retrieved_paragraphs) | |
context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved) | |
context_retrieved_lst = [doc['answer'] for doc in context_retrieved] | |
# print(country) | |
# print(retrieved_paragraphs) | |
docs_html = [] | |
for i, d in enumerate(context_retrieved, 1): | |
docs_html.append(make_html_source(d, i)) | |
docs_html = "".join(docs_html) | |
response = await generate(query=query, context=retrieved_paragraphs) | |
# Log the interaction | |
chat_logger.log( | |
query=query, | |
answer=response, | |
retrieved_content=context_retrieved_lst, | |
request=request | |
) | |
except Exception as e: | |
response = f"Error retrieving information: {str(e)}" | |
# Add disclaimer to the response | |
response_with_disclaimer = BEGINNING_TEXT + response + DISCLAIMER_TEXT | |
displayed_response = "" | |
for i, char in enumerate(response_with_disclaimer): | |
displayed_response += char | |
history[-1] = (query, displayed_response) | |
yield history, docs_html | |
# Only add delay every few characters to avoid being too slow | |
if i % 3 == 0: # Adjust this number to control speed | |
await asyncio.sleep(0.02) | |
def auto_analyze_file(file, history): | |
"""Automatically analyze uploaded GeoJSON file and add results to chat""" | |
if file is not None: | |
try: | |
# Call API immediately and cache results | |
file_key = f"{file.name}_{file.size if hasattr(file, 'size') else 'unknown'}" | |
if file_key not in geojson_analysis_cache: | |
stats_result = handle_geojson_upload(file) | |
formatted_stats = stats_result[0] | |
geojson_analysis_cache[file_key] = formatted_stats | |
# Add analysis results directly to chat (no intermediate message) | |
analysis_query = "📄 Análisis del GeoJSON cargado" | |
cached_result = geojson_analysis_cache[file_key] + DISCLAIMER_TEXT | |
# Add both query and response to history | |
history = history + [(analysis_query, cached_result)] | |
return history, "**Sources:** WhispAPI Analysis Results" | |
except Exception as e: | |
error_msg = f"❌ Error processing GeoJSON file: {str(e)}" | |
history = history + [("📄 Error en análisis GeoJSON", error_msg)] | |
return history, "" | |
return history, "" | |
def toggle_search_method(method): | |
"""Toggle between GeoJSON upload and country selection""" | |
if method == "Subir GeoJson": | |
return ( | |
gr.update(visible=True), # geojson_section | |
gr.update(visible=False), # reports_section | |
gr.update(value=None), # dropdown_country | |
) | |
else: # "Talk to Reports" | |
return ( | |
gr.update(visible=False), # geojson_section | |
gr.update(visible=True), # reports_section | |
gr.update(), # dropdown_country | |
) | |
def change_sample_questions(key): | |
"""Update visible examples based on selected category""" | |
keys = list(SAMPLE_QUESTIONS.keys()) | |
index = keys.index(key) | |
visible_bools = [False] * len(keys) | |
visible_bools[index] = True | |
return [gr.update(visible=visible_bools[i]) for i in range(len(keys))] | |
# Set up Gradio Theme | |
theme = gr.themes.Base( | |
primary_hue="green", | |
secondary_hue="blue", | |
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], | |
text_size=gr.themes.utils.sizes.text_sm, | |
) | |
# Custom CSS for DataFrame styling | |
custom_css = """ | |
/* DataFrame text sizing - Modify these values to change text size */ | |
.dataframe table { | |
font-size: 12px !important; /* Change this value (e.g., 10px, 14px, 16px) */ | |
} | |
.dataframe th { | |
font-size: 13px !important; /* Header text size */ | |
font-weight: 600 !important; | |
} | |
.dataframe td { | |
font-size: 12px !important; /* Cell text size */ | |
padding: 8px !important; /* Cell padding */ | |
} | |
/* Alternative size classes - change elem_classes="dataframe-small" in DataFrame component */ | |
.dataframe-small table { font-size: 10px !important; } | |
.dataframe-small th { font-size: 11px !important; } | |
.dataframe-small td { font-size: 10px !important; } | |
.dataframe-medium table { font-size: 14px !important; } | |
.dataframe-medium th { font-size: 15px !important; } | |
.dataframe-medium td { font-size: 14px !important; } | |
.dataframe-large table { font-size: 16px !important; } | |
.dataframe-large th { font-size: 17px !important; } | |
.dataframe-large td { font-size: 16px !important; } | |
""" | |
init_prompt = """ | |
Hola, soy Asistente EUDR, un asistente conversacional basado en inteligencia artificial diseñado para ayudarle a comprender el cumplimiento y el análisis del Reglamento de la UE sobre la deforestación. Responderé a sus preguntas utilizando los informes EUDR y los archivos GeoJSON cargados. | |
💡 **Cómo utilizarlo (panel a la derecha)** | |
**Modo de uso:** elija entre subir un archivo GeoJSON para su análisis o consultar los informes EUDR filtrados por país. | |
**Ejemplos:** seleccione entre preguntas de ejemplo seleccionadas de diferentes categorías. | |
**Referencias:** consulte las fuentes de contenido utilizadas para la verificación de datos. | |
⚠️ Para conocer las limitaciones y la información sobre la recopilación de datos, consulte la pestaña «Exención de responsibilidad». | |
⚠️ Al utilizar esta aplicación, usted acepta que recopilemos estadísticas de uso (como preguntas formuladas, comentarios realizados, duración de la sesión, tipo de dispositivo e información geográfica anónima) para comprender el rendimiento y mejorar continuamente la herramienta, basándonos en nuestro interés legítimo por mejorar nuestros servicios. | |
""" | |
with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo: | |
# Main Chat Interface | |
with gr.Tab("EUDR Bot"): | |
with gr.Row(): | |
# Left column - Chat interface (2/3 width) | |
with gr.Column(scale=2): | |
chatbot = gr.Chatbot( | |
value=[(None, init_prompt)], | |
show_copy_button=True, | |
show_label=False, | |
layout="panel", | |
avatar_images=(None, "chatbot_icon_2.png"), | |
height="auto" | |
) | |
# Feedback UI | |
with gr.Column(): | |
with gr.Row(visible=False) as feedback_row: | |
gr.Markdown("¿Te ha sido útil esta respuesta?") | |
with gr.Row(): | |
okay_btn = gr.Button("👍 De acuerdo", size="sm") | |
not_okay_btn = gr.Button("👎 No según lo esperado", size="sm") | |
feedback_thanks = gr.Markdown("Gracias por los comentarios.", visible=False) | |
# Input textbox | |
with gr.Row(): | |
textbox = gr.Textbox( | |
placeholder="¡Pregúntame cualquier cosa sobre el cumplimiento de la normativa EUDR o sube tu GeoJSON para su análisis!", | |
show_label=False, | |
scale=7, | |
lines=1, | |
interactive=True | |
) | |
# Right column - Controls and tabs (1/3 width) | |
with gr.Column(scale=1, variant="panel"): | |
with gr.Tabs() as tabs: | |
# Data Sources Tab | |
with gr.Tab("Modo de uso", id=2): | |
search_method = gr.Radio( | |
choices=["Hablar con documentos", "Subir GeoJson"], | |
label="Elija una fuente de datos", | |
info="Sube un GeoJSON para su análisis o selecciona informes EUDR específicos de cada país.", | |
value="Hablar con documentos", | |
) | |
# GeoJSON Upload Section | |
with gr.Group(visible=False) as geojson_section: | |
uploaded_file = gr.File( | |
label="Subir GeoJson", | |
file_types=[".geojson", ".json"], | |
file_count="single" | |
) | |
upload_status = gr.Markdown("", visible=False) | |
# Results table for WHISP API response | |
results_table = gr.DataFrame( | |
label="Resultados del análisis", | |
visible=False, | |
interactive=False, | |
wrap=True, | |
elem_classes="dataframe" | |
) | |
# Talk to Reports Section | |
with gr.Group(visible=True) as reports_section: | |
dropdown_country = gr.Dropdown( | |
["Ecuador", "Guatemala"], | |
label="Selecciona país", | |
value=None, | |
interactive=True, | |
) | |
# Examples Tab | |
with gr.Tab("Ejemplos", id=0): | |
examples_hidden = gr.Textbox(visible=False) | |
first_key = list(SAMPLE_QUESTIONS.keys())[0] | |
dropdown_samples = gr.Dropdown( | |
SAMPLE_QUESTIONS.keys(), | |
value=first_key, | |
interactive=True, | |
show_label=True, | |
label="Seleccione un ejemplo de pregunta." | |
) | |
# Create example sections | |
sample_groups = [] | |
for i, (key, questions) in enumerate(SAMPLE_QUESTIONS.items()): | |
examples_visible = True if i == 0 else False | |
with gr.Row(visible=examples_visible) as group_examples: | |
gr.Examples( | |
questions, | |
[examples_hidden], | |
examples_per_page=8, | |
run_on_click=False, | |
) | |
sample_groups.append(group_examples) | |
# Sources Tab | |
with gr.Tab("Referencia", id=1, elem_id="sources-textbox"): | |
sources_textbox = gr.HTML( | |
show_label=False, | |
value="Los documentos originales aparecerán aquí después de que haga una pregunta..." | |
) | |
# Guidelines Tab | |
with gr.Tab("Directrices"): | |
gr.Markdown(""" | |
#### Welcome to EUDR Q&A! | |
This AI-powered assistant helps you understand EU Deforestation Regulation compliance and analyze geographic data. | |
## 💬 How to Ask Effective Questions | |
| ❌ Less Effective | ✅ More Effective | | |
|------------------|-------------------| | |
| "What is deforestation?" | "What are the main deforestation hotspots in Ecuador?" | | |
| "Tell me about compliance" | "What EUDR requirements apply to coffee imports from Guatemala?" | | |
| "Show me data" | "What is the deforestation rate in the uploaded region?" | | |
## 🔍 Using Data Sources | |
**Upload GeoJSON:** Upload your geographic data files for automatic analysis via WHISP API | |
**Talk to Reports:** Select Ecuador or Guatemala for country-specific EUDR analysis | |
## ⭐ Best Practices | |
- Be specific about regions, commodities, or time periods | |
- Ask one question at a time for clearer answers | |
- Use follow-up questions to explore topics deeper | |
- Provide context when possible | |
""") | |
# About Tab | |
with gr.Tab("Información"): | |
gr.Markdown(""" | |
## About EUDR Q&A | |
The **EU Deforestation Regulation (EUDR)** requires companies to ensure that specific commodities | |
placed on the EU market are deforestation-free and legally produced. | |
This AI-powered tool helps stakeholders: | |
- Understand EUDR compliance requirements | |
- Analyze geographic deforestation data using WHISP API | |
- Assess supply chain risks | |
- Navigate complex regulatory landscapes | |
**Developed by GIZ** to enhance accessibility and understanding of EUDR requirements | |
through advanced AI and geographic data processing capabilities. | |
### Key Features: | |
- Automatic analysis of uploaded GeoJSON files via WHISP API | |
- Country-specific EUDR compliance guidance | |
- Real-time question answering with source citations | |
- User-friendly interface for complex regulatory information | |
""") | |
# Disclaimer Tab | |
with gr.Tab("Exención de responsabilidad"): | |
gr.Markdown(""" | |
## Important Disclaimers | |
⚠️ **Scope & Limitations:** | |
- This tool is designed for EUDR compliance assistance and geographic data analysis | |
- Responses should not be considered official legal or compliance advice | |
- Always consult qualified professionals for official compliance decisions | |
⚠️ **Data & Privacy:** | |
- Uploaded GeoJSON files are processed via external WHISP API for analysis | |
- We collect usage statistics to improve the tool | |
- Files are processed temporarily and not permanently stored | |
⚠️ **AI Limitations:** | |
- Responses are AI-generated and may contain inaccuracies | |
- The tool is a prototype under continuous development | |
- Always verify important information with authoritative sources | |
**Data Collection:** We collect questions, answers, feedback, and anonymized usage statistics | |
to improve tool performance based on legitimate interest in service enhancement.By using this chatbot, you agree to these terms and acknowledge that you are solely responsible for any reliance on or actions taken based on its responses. | |
**Technical Information:** User can read more about the technical information about the tool in [**Readme**](https://huggingface.co/spaces/GIZ/Asistente_EUDR/blob/main/README.md) of this tool. | |
**This is just a prototype and being tested and worked upon, so its not perfect and may sometimes give irrelevant answers**. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system. | |
""") | |
# Event Handlers | |
# Toggle search method | |
search_method.change( | |
fn=toggle_search_method, | |
inputs=[search_method], | |
outputs=[geojson_section, reports_section, dropdown_country] | |
) | |
# File upload - automatically analyze and display in chat (SIMPLIFIED) | |
uploaded_file.change( | |
fn=auto_analyze_file, | |
inputs=[uploaded_file, chatbot], | |
outputs=[chatbot, sources_textbox], | |
queue=False | |
) | |
# Chat functionality | |
textbox.submit( | |
start_chat, | |
[textbox, chatbot], | |
[textbox, tabs, chatbot], | |
queue=False | |
).then( | |
chat_response, | |
[textbox, chatbot, search_method, dropdown_country, uploaded_file], | |
[chatbot, sources_textbox] | |
).then( | |
lambda: gr.update(visible=True), | |
outputs=[feedback_row] | |
).then( | |
finish_chat, | |
outputs=[textbox] | |
) | |
# Examples functionality | |
examples_hidden.change( | |
start_chat, | |
[examples_hidden, chatbot], | |
[textbox, tabs, chatbot], | |
queue=False | |
).then( | |
chat_response, | |
[examples_hidden, chatbot, search_method, dropdown_country, uploaded_file], | |
[chatbot, sources_textbox] | |
).then( | |
lambda: gr.update(visible=True), | |
outputs=[feedback_row] | |
).then( | |
finish_chat, | |
outputs=[textbox] | |
) | |
# Feedback buttons | |
def log_feedback(feedback, chatbot): | |
# Get the last interaction from chatbot history | |
if chatbot and len(chatbot) > 0: | |
last_query, last_response = chatbot[-1] | |
chat_logger.log( | |
query=last_query, | |
answer=last_response, | |
retrieved_content=[], # We don't have access to the original retrieved content here | |
feedback=feedback | |
) | |
return (gr.update(visible=False), gr.update(visible=True)) | |
# Feedback buttons | |
okay_btn.click( | |
lambda chatbot: log_feedback("positive", chatbot), | |
inputs=[chatbot], | |
outputs=[feedback_row, feedback_thanks] | |
) | |
not_okay_btn.click( | |
lambda chatbot: log_feedback("negative", chatbot), | |
inputs=[chatbot], | |
outputs=[feedback_row, feedback_thanks] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() | |