Asistente_EUDR

Running on CPU Upgrade

App Files Files Community

Asistente_EUDR / app.py

Romulan12

changed stats fetching

0b1d4d1 25 days ago

raw

history blame

22.4 kB

	import gradio as gr
	import time
	import pandas as pd
	import asyncio
	from uuid import uuid4
	from gradio_client import Client, handle_file

	# Sample questions for examples
	SAMPLE_QUESTIONS = {
	"Deforestation Analysis": [
	"What are the main deforestation hotspots in Ecuador?",
	"Show me deforestation trends in the uploaded area",
	"What commodities are driving deforestation in Guatemala?"
	],
	"EUDR Compliance": [
	"What are the key EUDR requirements for coffee imports?",
	"How do I prove due diligence for my supply chain?",
	"What documentation is needed for EUDR compliance?"
	],
	"Risk Assessment": [
	"What is the deforestation risk level in this region?",
	"How do I assess supply chain risks?",
	"What are the compliance deadlines?"
	]
	}

	def format_whisp_statistics(df):
	"""Format WhispAPI statistics into a standardized, readable text"""
	try:
	# Extract required indicators from API response
	country = df['Country'].iloc[0]
	admin_level = df['Admin_Level_1'].iloc[0]
	area = round(float(df['Area'].iloc[0]), 2) if 'Area' in df.columns else "Not available"

	risk_level = df['risk_level'].iloc[0] if 'risk_level' in df.columns else "Not available"
	risk_pcrop = df['risk_pcrop'].iloc[0] if 'risk_pcrop' in df.columns else "Not available"
	risk_acrop = df['risk_acrop'].iloc[0] if 'risk_acrop' in df.columns else "Not available"
	risk_timber = df['risk_timber'].iloc[0] if 'risk_timber' in df.columns else "Not available"

	# TMF_def_after_2020 is a risk category, not hectares
	def_after_2020 = df['TMF_def_after_2020'].iloc[0] if 'TMF_def_after_2020' in df.columns else "Not available"

	# Format the text output
	output = f"""📊 Analysis Results for Your Plot

	Plot Information:
	- Country: {country}
	- Administrative Region: {admin_level}
	- Total Area: {area} hectares

	Risk Assessment:
	- Overall Risk Level: {risk_level}
	- Permanent Crop Risk: {risk_pcrop}
	- Annual Crop Risk: {risk_acrop}
	- Timber Risk: {risk_timber}

	Deforestation Analysis:
	- Deforestation risk after 2020: {def_after_2020}
	"""
	return output
	except Exception as e:
	return f"Error formatting statistics: {str(e)}"

	# def format_whisp_statistics(df):
	# """Format WhispAPI statistics into a standardized, readable text"""
	# try:
	# # Extract required indicators from API response
	# country = df['Country'].iloc[0]
	# admin_level = df['Admin_Level_1'].iloc[0]
	# area = round(df['Area'].iloc[0], 2)
	# risk_level = df['risk_level'].iloc[0] if 'risk_level' in df.columns else "Not available"
	# risk_pcrop = df['risk_pcrop'].iloc[0] if 'risk_pcrop' in df.columns else "Not available"
	# risk_acrop = df['risk_acrop'].iloc[0] if 'risk_acrop' in df.columns else "Not available"
	# risk_timber = df['risk_timber'].iloc[0] if 'risk_timber' in df.columns else "Not available"
	# deforestation_after_2020 = df['TMF_def_after_2020'].iloc[0] if 'TMF_def_after_2020' in df.columns else 0

	# # Format the text output
	# output = f"""📊 Analysis Results for Your Plot

	# Plot Information:
	# - Country: {country}
	# - Administrative Region: {admin_level}
	# - Total Area: {area} hectares

	# Risk Assessment:
	# - Overall Risk Level: {risk_level}
	# - Permanent Crop Risk: {risk_pcrop}
	# - Annual Crop Risk: {risk_acrop}
	# - Timber Risk: {risk_timber}

	# Deforestation Analysis:
	# - Deforestation after 2020: {round(deforestation_after_2020, 2)} hectares
	# """
	# return output
	# except Exception as e:
	# return f"Error formatting statistics: {str(e)}"

	def handle_geojson_upload(file):
	"""Handle GeoJSON file upload and call WHISP API"""
	if file is not None:
	try:
	# Initialize WHISP API client
	client = Client("https://giz-chatfed-whisp.hf.space/")

	# Call the API with the uploaded file
	result = client.predict(
	file=handle_file(file.name),
	api_name="/get_statistics"
	)

	# Convert result to DataFrame
	df = pd.DataFrame(result['data'], columns=result['headers'])

	# Format statistics into readable text
	formatted_stats = format_whisp_statistics(df)

	return (
	formatted_stats, # Keep formatted statistics for chat
	gr.update(visible=True), # Keep status visible
	gr.update(visible=False) # Always hide results table
	)

	except Exception as e:
	error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
	return (
	error_msg,
	gr.update(visible=True), # upload_status
	gr.update(visible=False) # results_table
	)
	else:
	return (
	"",
	gr.update(visible=False), # upload_status
	gr.update(visible=False) # results_table
	)

	def retrieve_paragraphs(query):
	"""Connect to retriever and retrieve paragraphs"""

	try:
	# Call the API with the uploaded file
	client = Client("https://giz-eudr-retriever.hf.space/")
	result = client.predict(
	query=query,
	reports_filter="",
	sources_filter="",
	subtype_filter="",
	year_filter="",
	api_name="/retrieve"
	)

	return (
	results,
	gr.update(visible=True), # Keep status visible
	gr.update(visible=False) # Always hide results table
	)

	except Exception as e:
	error_msg=(f"Error retrieving paragraphs: {str(e)}")
	return (
	error_msg,
	gr.update(visible=True), # upload_status
	gr.update(visible=False) # results_table
	)
	else:
	return (
	"",
	gr.update(visible=False), # upload_status
	gr.update(visible=False) # results_table
	)

	def start_chat(query, history):
	"""Start a new chat interaction"""
	history = history + [(query, None)]
	return gr.update(interactive=False), gr.update(selected=1), history

	def finish_chat():
	"""Finish chat and reset input"""
	return gr.update(interactive=True, value="")

	async def chat_response(query, history, method, country, uploaded_file):

	"""Generate chat response based on method and inputs"""

	# Validate inputs based on method
	if method == "Upload GeoJSON":
	if uploaded_file is None:
	warning_message = "⚠️ No GeoJSON file uploaded. Please upload a GeoJSON file first."
	history[-1] = (query, warning_message)
	yield history, ""
	return
	else: # "Talk to Reports"
	if not country:
	warning_message = "⚠️ No country selected. Please select a country to analyze reports."
	history[-1] = (query, warning_message)
	yield history, ""
	return

	# Get the formatted statistics if a file was just uploaded
	if method == "Upload GeoJSON" and uploaded_file:
	try:
	stats_result = handle_geojson_upload(uploaded_file)
	formatted_stats = stats_result[0] # Get the formatted statistics
	response = formatted_stats
	except Exception as e:
	response = f"Error processing file: {str(e)}"

	# Talk to report
	else:
	try:
	response = f"Based on EUDR reports for {country}, I can help you understand deforestation patterns and compliance requirements. Your question: '{query}' is being analyzed against our {country} database."

	# Retrieve info
	retrieved_info = retrieve_paragraphs(query)[0]
	response = retrieved_info

	except Exception as e:
	response = f"Error retrieving information: {str(e)}"


	# else:
	# # Default response for other queries
	# if method == "Upload GeoJSON":
	# response = f"Based on your uploaded GeoJSON file, I can help you analyze the deforestation patterns and EUDR compliance aspects in your area of interest. Your question: '{query}' is being processed against the geographic data you provided."
	# else:
	# response = f"Based on EUDR reports for {country}, I can help you understand deforestation patterns and compliance requirements. Your question: '{query}' is being analyzed against our {country} database."

	# Simulate streaming response
	words = response.split()
	for word in words:
	history[-1] = (query, " ".join(words[:words.index(word)+1]))
	yield history, "Sources: Sample source documents would appear here..."
	await asyncio.sleep(0.05)

	def toggle_search_method(method):
	"""Toggle between GeoJSON upload and country selection"""
	if method == "Upload GeoJSON":
	return (
	gr.update(visible=True), # geojson_section
	gr.update(visible=False), # reports_section
	gr.update(value=None), # dropdown_country
	)
	else: # "Talk to Reports"
	return (
	gr.update(visible=False), # geojson_section
	gr.update(visible=True), # reports_section
	gr.update(), # dropdown_country
	)

	def change_sample_questions(key):
	"""Update visible examples based on selected category"""
	keys = list(SAMPLE_QUESTIONS.keys())
	index = keys.index(key)
	visible_bools = [False] * len(keys)
	visible_bools[index] = True
	return [gr.update(visible=visible_bools[i]) for i in range(len(keys))]

	# Set up Gradio Theme
	theme = gr.themes.Base(
	primary_hue="green",
	secondary_hue="blue",
	font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
	text_size=gr.themes.utils.sizes.text_sm,
	)

	# Custom CSS for DataFrame styling
	custom_css = """
	/* DataFrame text sizing - Modify these values to change text size */
	.dataframe table {
	font-size: 12px !important; /* Change this value (e.g., 10px, 14px, 16px) */
	}

	.dataframe th {
	font-size: 13px !important; /* Header text size */
	font-weight: 600 !important;
	}

	.dataframe td {
	font-size: 12px !important; /* Cell text size */
	padding: 8px !important; /* Cell padding */
	}

	/* Alternative size classes - change elem_classes="dataframe-small" in DataFrame component */
	.dataframe-small table { font-size: 10px !important; }
	.dataframe-small th { font-size: 11px !important; }
	.dataframe-small td { font-size: 10px !important; }

	.dataframe-medium table { font-size: 14px !important; }
	.dataframe-medium th { font-size: 15px !important; }
	.dataframe-medium td { font-size: 14px !important; }

	.dataframe-large table { font-size: 16px !important; }
	.dataframe-large th { font-size: 17px !important; }
	.dataframe-large td { font-size: 16px !important; }
	"""

	init_prompt = """
	Hello, I am EUDR Q&A, an AI-powered conversational assistant designed to help you understand EU Deforestation Regulation compliance and analysis. I will answer your questions by using EUDR reports and uploaded GeoJSON files.

	💡 How to use (tabs on right)
	- Data Sources: Choose to either upload a GeoJSON file for analysis or talk to EUDR reports filtered by country.
	- Examples: Select from curated example questions across different categories.
	- Sources: View the content sources used to generate answers for fact-checking.

	⚠️ For limitations and data collection information, please check the Disclaimer tab.
	"""

	with gr.Blocks(title="EUDR Q&A", theme=theme, css=custom_css) as demo:

	# Main Chat Interface
	with gr.Tab("EUDR Q&A"):
	with gr.Row():
	# Left column - Chat interface (2/3 width)
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(
	value=[(None, init_prompt)],
	show_copy_button=True,
	show_label=False,
	layout="panel",
	avatar_images=(None, "🌳"),
	height=500
	)

	# Feedback UI
	with gr.Column():
	with gr.Row(visible=False) as feedback_row:
	gr.Markdown("Was this response helpful?")
	with gr.Row():
	okay_btn = gr.Button("👍 Okay", size="sm")
	not_okay_btn = gr.Button("👎 Not to expectations", size="sm")
	feedback_thanks = gr.Markdown("Thanks for the feedback!", visible=False)

	# Input textbox
	with gr.Row():
	textbox = gr.Textbox(
	placeholder="Ask me anything about EUDR compliance or upload your GeoJSON for analysis!",
	show_label=False,
	scale=7,
	lines=1,
	interactive=True
	)

	# Right column - Controls and tabs (1/3 width)
	with gr.Column(scale=1, variant="panel"):
	with gr.Tabs() as tabs:

	# Data Sources Tab
	with gr.Tab("Data Sources", id=2):
	search_method = gr.Radio(
	choices=["Upload GeoJSON", "Talk to Reports"],
	label="Choose data source",
	info="Upload a GeoJSON file for analysis or select country-specific EUDR reports",
	value="Upload GeoJSON",
	)

	# GeoJSON Upload Section
	with gr.Group(visible=True) as geojson_section:
	uploaded_file = gr.File(
	label="Upload GeoJSON File",
	file_types=[".geojson", ".json"],
	file_count="single"
	)
	upload_status = gr.Markdown("", visible=False)

	# Results table for WHISP API response
	results_table = gr.DataFrame(
	label="Analysis Results",
	visible=False,
	interactive=False,
	wrap=True,
	elem_classes="dataframe"
	)

	# Talk to Reports Section
	with gr.Group(visible=False) as reports_section:
	dropdown_country = gr.Dropdown(
	["Ecuador", "Guatemala"],
	label="Select Country",
	value=None,
	interactive=True,
	)

	# Examples Tab
	with gr.Tab("Examples", id=0):
	examples_hidden = gr.Textbox(visible=False)

	first_key = list(SAMPLE_QUESTIONS.keys())[0]
	dropdown_samples = gr.Dropdown(
	SAMPLE_QUESTIONS.keys(),
	value=first_key,
	interactive=True,
	show_label=True,
	label="Select a category of sample questions"
	)

	# Create example sections
	sample_groups = []
	for i, (key, questions) in enumerate(SAMPLE_QUESTIONS.items()):
	examples_visible = True if i == 0 else False
	with gr.Row(visible=examples_visible) as group_examples:
	gr.Examples(
	questions,
	[examples_hidden],
	examples_per_page=8,
	run_on_click=False,
	)
	sample_groups.append(group_examples)

	# Sources Tab
	with gr.Tab("Sources", id=1):
	sources_textbox = gr.HTML(
	show_label=False,
	value="Source documents will appear here after you ask a question..."
	)

	# Guidelines Tab
	with gr.Tab("Guidelines"):
	gr.Markdown("""
	#### Welcome to EUDR Q&A!

	This AI-powered assistant helps you understand EU Deforestation Regulation compliance and analyze geographic data.

	## 💬 How to Ask Effective Questions

	\| ❌ Less Effective \| ✅ More Effective \|
	\|------------------\|-------------------\|
	\| "What is deforestation?" \| "What are the main deforestation hotspots in Ecuador?" \|
	\| "Tell me about compliance" \| "What EUDR requirements apply to coffee imports from Guatemala?" \|
	\| "Show me data" \| "What is the deforestation rate in the uploaded region?" \|

	## 🔍 Using Data Sources

	Upload GeoJSON: Upload your geographic data files for automatic analysis via WHISP API
	Talk to Reports: Select Ecuador or Guatemala for country-specific EUDR analysis

	## ⭐ Best Practices

	- Be specific about regions, commodities, or time periods
	- Ask one question at a time for clearer answers
	- Use follow-up questions to explore topics deeper
	- Provide context when possible
	""")

	# About Tab
	with gr.Tab("About"):
	gr.Markdown("""
	## About EUDR Q&A

	The EU Deforestation Regulation (EUDR) requires companies to ensure that specific commodities
	placed on the EU market are deforestation-free and legally produced.

	This AI-powered tool helps stakeholders:
	- Understand EUDR compliance requirements
	- Analyze geographic deforestation data using WHISP API
	- Assess supply chain risks
	- Navigate complex regulatory landscapes

	Developed by GIZ to enhance accessibility and understanding of EUDR requirements
	through advanced AI and geographic data processing capabilities.

	### Key Features:
	- Automatic analysis of uploaded GeoJSON files via WHISP API
	- Country-specific EUDR compliance guidance
	- Real-time question answering with source citations
	- User-friendly interface for complex regulatory information
	""")

	# Disclaimer Tab
	with gr.Tab("Disclaimer"):
	gr.Markdown("""
	## Important Disclaimers

	⚠️ Scope & Limitations:
	- This tool is designed for EUDR compliance assistance and geographic data analysis
	- Responses should not be considered official legal or compliance advice
	- Always consult qualified professionals for official compliance decisions

	⚠️ Data & Privacy:
	- Uploaded GeoJSON files are processed via external WHISP API for analysis
	- We collect usage statistics to improve the tool
	- Files are processed temporarily and not permanently stored

	⚠️ AI Limitations:
	- Responses are AI-generated and may contain inaccuracies
	- The tool is a prototype under continuous development
	- Always verify important information with authoritative sources

	Data Collection: We collect questions, answers, feedback, and anonymized usage statistics
	to improve tool performance based on legitimate interest in service enhancement.

	By using this tool, you acknowledge these limitations and agree to use responses responsibly.
	""")

	# Event Handlers

	# Toggle search method
	search_method.change(
	fn=toggle_search_method,
	inputs=[search_method],
	outputs=[geojson_section, reports_section, dropdown_country]
	)

	# File upload - automatically process when file is uploaded
	uploaded_file.change(
	fn=handle_geojson_upload,
	inputs=[uploaded_file],
	outputs=[upload_status, upload_status, results_table]
	)

	# Chat functionality
	textbox.submit(
	start_chat,
	[textbox, chatbot],
	[textbox, tabs, chatbot],
	queue=False
	).then(
	chat_response,
	[textbox, chatbot, search_method, dropdown_country, uploaded_file],
	[chatbot, sources_textbox]
	).then(
	lambda: gr.update(visible=True),
	outputs=[feedback_row]
	).then(
	finish_chat,
	outputs=[textbox]
	)

	# Examples functionality
	examples_hidden.change(
	start_chat,
	[examples_hidden, chatbot],
	[textbox, tabs, chatbot],
	queue=False
	).then(
	chat_response,
	[examples_hidden, chatbot, search_method, dropdown_country, uploaded_file],
	[chatbot, sources_textbox]
	).then(
	lambda: gr.update(visible=True),
	outputs=[feedback_row]
	).then(
	finish_chat,
	outputs=[textbox]
	)

	# Sample questions dropdown
	dropdown_samples.change(
	change_sample_questions,
	[dropdown_samples],
	sample_groups
	)

	# Feedback buttons
	okay_btn.click(
	lambda: (gr.update(visible=False), gr.update(visible=True)),
	outputs=[feedback_row, feedback_thanks]
	)

	not_okay_btn.click(
	lambda: (gr.update(visible=False), gr.update(visible=True)),
	outputs=[feedback_row, feedback_thanks]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()