Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
move whisp functions
Browse files- app.py +1 -169
- utils/__pycache__/whisp_api.cpython-310.pyc +0 -0
- utils/whisp_api.py +174 -0
app.py
CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
|
|
4 |
import asyncio
|
5 |
from uuid import uuid4
|
6 |
from gradio_client import Client, handle_file
|
|
|
7 |
|
8 |
# Sample questions for examples
|
9 |
SAMPLE_QUESTIONS = {
|
@@ -24,176 +25,7 @@ SAMPLE_QUESTIONS = {
|
|
24 |
]
|
25 |
}
|
26 |
|
27 |
-
def get_value(df, colname):
|
28 |
-
"""Fetch value from WhispAPI-style Column/Value dataframe"""
|
29 |
-
if "Column" in df.columns and "Value" in df.columns:
|
30 |
-
match = df.loc[df["Column"] == colname, "Value"]
|
31 |
-
if not match.empty:
|
32 |
-
return match.values[0]
|
33 |
-
return "Not available"
|
34 |
-
|
35 |
-
def format_whisp_statistics(df):
|
36 |
-
"""Format WhispAPI statistics into readable text for end-users"""
|
37 |
-
try:
|
38 |
-
# Country code mapping for better display
|
39 |
-
country_codes = {
|
40 |
-
'HND': 'Honduras',
|
41 |
-
'GTM': 'Guatemala',
|
42 |
-
'ECU': 'Ecuador',
|
43 |
-
'COL': 'Colombia',
|
44 |
-
'PER': 'Peru',
|
45 |
-
'BRA': 'Brazil',
|
46 |
-
'BOL': 'Bolivia',
|
47 |
-
'CRI': 'Costa Rica',
|
48 |
-
'PAN': 'Panama',
|
49 |
-
'NIC': 'Nicaragua'
|
50 |
-
}
|
51 |
-
|
52 |
-
country_raw = get_value(df, "Country")
|
53 |
-
country = country_codes.get(country_raw, country_raw)
|
54 |
-
admin_level = get_value(df, "Admin_Level_1")
|
55 |
-
area_raw = get_value(df, "Area")
|
56 |
-
|
57 |
-
# Format area with proper rounding and units
|
58 |
-
try:
|
59 |
-
area_num = float(area_raw)
|
60 |
-
if area_num < 1:
|
61 |
-
area_text = f"{area_num:.3f} hectares"
|
62 |
-
elif area_num < 100:
|
63 |
-
area_text = f"{area_num:.2f} hectares"
|
64 |
-
else:
|
65 |
-
area_text = f"{area_num:,.1f} hectares"
|
66 |
-
except:
|
67 |
-
area_text = str(area_raw) if area_raw != "Not available" else "Not available"
|
68 |
-
|
69 |
-
risk_level = get_value(df, "risk_level")
|
70 |
-
risk_pcrop = get_value(df, "risk_pcrop")
|
71 |
-
risk_acrop = get_value(df, "risk_acrop")
|
72 |
-
risk_timber = get_value(df, "risk_timber")
|
73 |
-
def_after_2020_raw = get_value(df, "TMF_def_after_2020")
|
74 |
-
|
75 |
-
# Helper function to format risk levels with colors/emojis
|
76 |
-
def format_risk(risk_val):
|
77 |
-
if not risk_val or risk_val in ["Not available", "not available"]:
|
78 |
-
return "🔍 **Not Available** *(Analysis pending)*"
|
79 |
-
elif isinstance(risk_val, str):
|
80 |
-
risk_lower = risk_val.lower().strip()
|
81 |
-
if risk_lower == "low":
|
82 |
-
return "🟢 **Low Risk**"
|
83 |
-
elif risk_lower == "medium":
|
84 |
-
return "🟡 **Medium Risk**"
|
85 |
-
elif risk_lower == "high":
|
86 |
-
return "🟠 **High Risk**"
|
87 |
-
elif risk_lower == "very high":
|
88 |
-
return "🔴 **Very High Risk**"
|
89 |
-
elif risk_lower == "more_info_needed":
|
90 |
-
return "📊 **Assessment Pending** *(More data needed)*"
|
91 |
-
else:
|
92 |
-
return f"ℹ️ **{risk_val.title()}**"
|
93 |
-
return str(risk_val)
|
94 |
-
|
95 |
-
# Format deforestation data
|
96 |
-
def format_deforestation(def_val):
|
97 |
-
if not def_val or def_val in ["Not available", "not available"]:
|
98 |
-
return "🔍 **No Data Available**"
|
99 |
-
try:
|
100 |
-
def_num = float(def_val)
|
101 |
-
if def_num == 0:
|
102 |
-
return "✅ **No Recent Deforestation Detected**"
|
103 |
-
elif def_num < 0.1:
|
104 |
-
return f"⚠️ **{def_num:.3f} hectares detected**"
|
105 |
-
else:
|
106 |
-
return f"⚠️ **{def_num:.2f} hectares detected**"
|
107 |
-
except:
|
108 |
-
return f"ℹ️ **{def_val}**"
|
109 |
-
|
110 |
-
# Create EUDR compliance assessment
|
111 |
-
def get_compliance_status(def_after_2020):
|
112 |
-
try:
|
113 |
-
def_num = float(def_after_2020)
|
114 |
-
if def_num == 0:
|
115 |
-
return "✅ **COMPLIANT** - No recent deforestation detected"
|
116 |
-
elif def_num > 0:
|
117 |
-
return "⚠️ **REQUIRES ATTENTION** - Recent deforestation detected"
|
118 |
-
except:
|
119 |
-
return "🔍 **ASSESSMENT NEEDED** - Insufficient data for compliance determination"
|
120 |
-
|
121 |
-
deforestation_formatted = format_deforestation(def_after_2020_raw)
|
122 |
-
compliance_status = get_compliance_status(def_after_2020_raw)
|
123 |
-
|
124 |
-
output = f"""🌍 **Geographic Analysis Results**
|
125 |
-
|
126 |
-
📍 **Location Details**
|
127 |
-
- **Country**: {country}
|
128 |
-
- **Administrative Region**: {admin_level}
|
129 |
-
- **Total Area**: {area_text}
|
130 |
-
|
131 |
-
⚠️ **Deforestation Risk Assessment**
|
132 |
-
*Risk levels are based on historical patterns, environmental factors, and land use data*
|
133 |
-
|
134 |
-
- **Overall Risk**: {format_risk(risk_level)}
|
135 |
-
- **Permanent Crops**: {format_risk(risk_pcrop)}
|
136 |
-
*Coffee, cocoa, palm oil, fruit trees*
|
137 |
-
- **Annual Crops**: {format_risk(risk_acrop)}
|
138 |
-
*Soy, corn, rice, vegetables*
|
139 |
-
- **Timber Extraction**: {format_risk(risk_timber)}
|
140 |
-
*Logging and wood harvesting*
|
141 |
-
|
142 |
-
🌳 **EUDR Compliance Analysis**
|
143 |
-
*Based on Tropical Moist Forest satellite monitoring*
|
144 |
-
|
145 |
-
**Recent Deforestation (2020-Present):** {deforestation_formatted}
|
146 |
-
|
147 |
-
**EUDR Compliance Status:** {compliance_status}
|
148 |
-
|
149 |
-
---
|
150 |
-
💡 **Key Insights**
|
151 |
-
\t **For Suppliers**: {compliance_status.split(' - ')[1] if ' - ' in compliance_status else 'Review compliance requirements carefully'} \n
|
152 |
-
\t **Risk Factors**: Focus on {', '.join([t.split('*')[1].strip('*') for t in [risk_pcrop, risk_acrop, risk_timber] if 'High' in format_risk(t)])} if any high-risk activities detected \n
|
153 |
-
\t **Next Steps**: {"Conduct additional due diligence if recent deforestation is detected" if "ATTENTION" in compliance_status else "Continue monitoring and maintain documentation"} \n
|
154 |
-
"""
|
155 |
-
return output
|
156 |
-
except Exception as e:
|
157 |
-
return f"❌ **Analysis Error**\n\nUnable to process the geographic data: {str(e)}\n\n📋 **Troubleshooting:**\n- Verify your GeoJSON file format\n- Check file size (should be < 10MB)\n- Ensure coordinates are valid\n\nPlease try uploading again or contact support."
|
158 |
|
159 |
-
def handle_geojson_upload(file):
|
160 |
-
"""Handle GeoJSON file upload and call WHISP API"""
|
161 |
-
if file is not None:
|
162 |
-
try:
|
163 |
-
# Initialize WHISP API client
|
164 |
-
client = Client("https://giz-chatfed-whisp.hf.space/")
|
165 |
-
|
166 |
-
# Call the API with the uploaded file
|
167 |
-
result = client.predict(
|
168 |
-
file=handle_file(file.name),
|
169 |
-
api_name="/get_statistics"
|
170 |
-
)
|
171 |
-
|
172 |
-
# Convert result to DataFrame
|
173 |
-
df = pd.DataFrame(result['data'], columns=result['headers'])
|
174 |
-
|
175 |
-
# Format statistics into readable text
|
176 |
-
formatted_stats = format_whisp_statistics(df)
|
177 |
-
|
178 |
-
return (
|
179 |
-
formatted_stats, # Keep formatted statistics for chat
|
180 |
-
gr.update(visible=True), # Keep status visible
|
181 |
-
gr.update(visible=False) # Always hide results table
|
182 |
-
)
|
183 |
-
|
184 |
-
except Exception as e:
|
185 |
-
error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
|
186 |
-
return (
|
187 |
-
error_msg,
|
188 |
-
gr.update(visible=True), # upload_status
|
189 |
-
gr.update(visible=False) # results_table
|
190 |
-
)
|
191 |
-
else:
|
192 |
-
return (
|
193 |
-
"",
|
194 |
-
gr.update(visible=False), # upload_status
|
195 |
-
gr.update(visible=False) # results_table
|
196 |
-
)
|
197 |
|
198 |
def retrieve_paragraphs(query):
|
199 |
"""Connect to retriever and retrieve paragraphs"""
|
|
|
4 |
import asyncio
|
5 |
from uuid import uuid4
|
6 |
from gradio_client import Client, handle_file
|
7 |
+
from utils.whisp_api import handle_geojson_upload
|
8 |
|
9 |
# Sample questions for examples
|
10 |
SAMPLE_QUESTIONS = {
|
|
|
25 |
]
|
26 |
}
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def retrieve_paragraphs(query):
|
31 |
"""Connect to retriever and retrieve paragraphs"""
|
utils/__pycache__/whisp_api.cpython-310.pyc
ADDED
Binary file (5.5 kB). View file
|
|
utils/whisp_api.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
def get_value(df, colname):
|
6 |
+
"""Fetch value from WhispAPI-style Column/Value dataframe"""
|
7 |
+
if "Column" in df.columns and "Value" in df.columns:
|
8 |
+
match = df.loc[df["Column"] == colname, "Value"]
|
9 |
+
if not match.empty:
|
10 |
+
return match.values[0]
|
11 |
+
return "Not available"
|
12 |
+
|
13 |
+
def format_whisp_statistics(df):
|
14 |
+
"""Format WhispAPI statistics into readable text for end-users"""
|
15 |
+
try:
|
16 |
+
# Country code mapping for better display
|
17 |
+
country_codes = {
|
18 |
+
'HND': 'Honduras',
|
19 |
+
'GTM': 'Guatemala',
|
20 |
+
'ECU': 'Ecuador',
|
21 |
+
'COL': 'Colombia',
|
22 |
+
'PER': 'Peru',
|
23 |
+
'BRA': 'Brazil',
|
24 |
+
'BOL': 'Bolivia',
|
25 |
+
'CRI': 'Costa Rica',
|
26 |
+
'PAN': 'Panama',
|
27 |
+
'NIC': 'Nicaragua'
|
28 |
+
}
|
29 |
+
|
30 |
+
country_raw = get_value(df, "Country")
|
31 |
+
country = country_codes.get(country_raw, country_raw)
|
32 |
+
admin_level = get_value(df, "Admin_Level_1")
|
33 |
+
area_raw = get_value(df, "Area")
|
34 |
+
|
35 |
+
# Format area with proper rounding and units
|
36 |
+
try:
|
37 |
+
area_num = float(area_raw)
|
38 |
+
if area_num < 1:
|
39 |
+
area_text = f"{area_num:.3f} hectares"
|
40 |
+
elif area_num < 100:
|
41 |
+
area_text = f"{area_num:.2f} hectares"
|
42 |
+
else:
|
43 |
+
area_text = f"{area_num:,.1f} hectares"
|
44 |
+
except:
|
45 |
+
area_text = str(area_raw) if area_raw != "Not available" else "Not available"
|
46 |
+
|
47 |
+
risk_level = get_value(df, "risk_level")
|
48 |
+
risk_pcrop = get_value(df, "risk_pcrop")
|
49 |
+
risk_acrop = get_value(df, "risk_acrop")
|
50 |
+
risk_timber = get_value(df, "risk_timber")
|
51 |
+
def_after_2020_raw = get_value(df, "TMF_def_after_2020")
|
52 |
+
|
53 |
+
# Helper function to format risk levels with colors/emojis
|
54 |
+
def format_risk(risk_val):
|
55 |
+
if not risk_val or risk_val in ["Not available", "not available"]:
|
56 |
+
return "🔍 **Not Available** *(Analysis pending)*"
|
57 |
+
elif isinstance(risk_val, str):
|
58 |
+
risk_lower = risk_val.lower().strip()
|
59 |
+
if risk_lower == "low":
|
60 |
+
return "🟢 **Low Risk**"
|
61 |
+
elif risk_lower == "medium":
|
62 |
+
return "🟡 **Medium Risk**"
|
63 |
+
elif risk_lower == "high":
|
64 |
+
return "🟠 **High Risk**"
|
65 |
+
elif risk_lower == "very high":
|
66 |
+
return "🔴 **Very High Risk**"
|
67 |
+
elif risk_lower == "more_info_needed":
|
68 |
+
return "📊 **Assessment Pending** *(More data needed)*"
|
69 |
+
else:
|
70 |
+
return f"ℹ️ **{risk_val.title()}**"
|
71 |
+
return str(risk_val)
|
72 |
+
|
73 |
+
# Format deforestation data
|
74 |
+
def format_deforestation(def_val):
|
75 |
+
if not def_val or def_val in ["Not available", "not available"]:
|
76 |
+
return "🔍 **No Data Available**"
|
77 |
+
try:
|
78 |
+
def_num = float(def_val)
|
79 |
+
if def_num == 0:
|
80 |
+
return "✅ **No Recent Deforestation Detected**"
|
81 |
+
elif def_num < 0.1:
|
82 |
+
return f"⚠️ **{def_num:.3f} hectares detected**"
|
83 |
+
else:
|
84 |
+
return f"⚠️ **{def_num:.2f} hectares detected**"
|
85 |
+
except:
|
86 |
+
return f"ℹ️ **{def_val}**"
|
87 |
+
|
88 |
+
# Create EUDR compliance assessment
|
89 |
+
def get_compliance_status(def_after_2020):
|
90 |
+
try:
|
91 |
+
def_num = float(def_after_2020)
|
92 |
+
if def_num == 0:
|
93 |
+
return "✅ **COMPLIANT** - No recent deforestation detected"
|
94 |
+
elif def_num > 0:
|
95 |
+
return "⚠️ **REQUIRES ATTENTION** - Recent deforestation detected"
|
96 |
+
except:
|
97 |
+
return "🔍 **ASSESSMENT NEEDED** - Insufficient data for compliance determination"
|
98 |
+
|
99 |
+
deforestation_formatted = format_deforestation(def_after_2020_raw)
|
100 |
+
compliance_status = get_compliance_status(def_after_2020_raw)
|
101 |
+
|
102 |
+
output = f"""🌍 **Geographic Analysis Results**
|
103 |
+
|
104 |
+
📍 **Location Details**
|
105 |
+
- **Country**: {country}
|
106 |
+
- **Administrative Region**: {admin_level}
|
107 |
+
- **Total Area**: {area_text}
|
108 |
+
|
109 |
+
⚠️ **Deforestation Risk Assessment**
|
110 |
+
*Risk levels are based on historical patterns, environmental factors, and land use data*
|
111 |
+
|
112 |
+
- **Overall Risk**: {format_risk(risk_level)}
|
113 |
+
- **Permanent Crops**: {format_risk(risk_pcrop)}
|
114 |
+
*Coffee, cocoa, palm oil, fruit trees*
|
115 |
+
- **Annual Crops**: {format_risk(risk_acrop)}
|
116 |
+
*Soy, corn, rice, vegetables*
|
117 |
+
- **Timber Extraction**: {format_risk(risk_timber)}
|
118 |
+
*Logging and wood harvesting*
|
119 |
+
|
120 |
+
🌳 **EUDR Compliance Analysis**
|
121 |
+
*Based on Tropical Moist Forest satellite monitoring*
|
122 |
+
|
123 |
+
**Recent Deforestation (2020-Present):** {deforestation_formatted}
|
124 |
+
|
125 |
+
**EUDR Compliance Status:** {compliance_status}
|
126 |
+
|
127 |
+
---
|
128 |
+
💡 **Key Insights**
|
129 |
+
\t **For Suppliers**: {compliance_status.split(' - ')[1] if ' - ' in compliance_status else 'Review compliance requirements carefully'} \n
|
130 |
+
\t **Risk Factors**: Focus on {', '.join([t.split('*')[1].strip('*') for t in [risk_pcrop, risk_acrop, risk_timber] if 'High' in format_risk(t)])} if any high-risk activities detected \n
|
131 |
+
\t **Next Steps**: {"Conduct additional due diligence if recent deforestation is detected" if "ATTENTION" in compliance_status else "Continue monitoring and maintain documentation"} \n
|
132 |
+
"""
|
133 |
+
return output
|
134 |
+
except Exception as e:
|
135 |
+
return f"❌ **Analysis Error**\n\nUnable to process the geographic data: {str(e)}\n\n📋 **Troubleshooting:**\n- Verify your GeoJSON file format\n- Check file size (should be < 10MB)\n- Ensure coordinates are valid\n\nPlease try uploading again or contact support."
|
136 |
+
|
137 |
+
def handle_geojson_upload(file):
|
138 |
+
"""Handle GeoJSON file upload and call WHISP API"""
|
139 |
+
if file is not None:
|
140 |
+
try:
|
141 |
+
# Initialize WHISP API client
|
142 |
+
client = Client("https://giz-chatfed-whisp.hf.space/")
|
143 |
+
|
144 |
+
# Call the API with the uploaded file
|
145 |
+
result = client.predict(
|
146 |
+
file=handle_file(file.name),
|
147 |
+
api_name="/get_statistics"
|
148 |
+
)
|
149 |
+
|
150 |
+
# Convert result to DataFrame
|
151 |
+
df = pd.DataFrame(result['data'], columns=result['headers'])
|
152 |
+
|
153 |
+
# Format statistics into readable text
|
154 |
+
formatted_stats = format_whisp_statistics(df)
|
155 |
+
|
156 |
+
return (
|
157 |
+
formatted_stats, # Keep formatted statistics for chat
|
158 |
+
gr.update(visible=True), # Keep status visible
|
159 |
+
gr.update(visible=False) # Always hide results table
|
160 |
+
)
|
161 |
+
|
162 |
+
except Exception as e:
|
163 |
+
error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
|
164 |
+
return (
|
165 |
+
error_msg,
|
166 |
+
gr.update(visible=True), # upload_status
|
167 |
+
gr.update(visible=False) # results_table
|
168 |
+
)
|
169 |
+
else:
|
170 |
+
return (
|
171 |
+
"",
|
172 |
+
gr.update(visible=False), # upload_status
|
173 |
+
gr.update(visible=False) # results_table
|
174 |
+
)
|