Spaces:
GIZ
/
Running on CPU Upgrade

leavoigt commited on
Commit
b1d15aa
·
1 Parent(s): 773f59c

move whisp functions

Browse files
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  import asyncio
5
  from uuid import uuid4
6
  from gradio_client import Client, handle_file
 
7
 
8
  # Sample questions for examples
9
  SAMPLE_QUESTIONS = {
@@ -24,176 +25,7 @@ SAMPLE_QUESTIONS = {
24
  ]
25
  }
26
 
27
- def get_value(df, colname):
28
- """Fetch value from WhispAPI-style Column/Value dataframe"""
29
- if "Column" in df.columns and "Value" in df.columns:
30
- match = df.loc[df["Column"] == colname, "Value"]
31
- if not match.empty:
32
- return match.values[0]
33
- return "Not available"
34
-
35
- def format_whisp_statistics(df):
36
- """Format WhispAPI statistics into readable text for end-users"""
37
- try:
38
- # Country code mapping for better display
39
- country_codes = {
40
- 'HND': 'Honduras',
41
- 'GTM': 'Guatemala',
42
- 'ECU': 'Ecuador',
43
- 'COL': 'Colombia',
44
- 'PER': 'Peru',
45
- 'BRA': 'Brazil',
46
- 'BOL': 'Bolivia',
47
- 'CRI': 'Costa Rica',
48
- 'PAN': 'Panama',
49
- 'NIC': 'Nicaragua'
50
- }
51
-
52
- country_raw = get_value(df, "Country")
53
- country = country_codes.get(country_raw, country_raw)
54
- admin_level = get_value(df, "Admin_Level_1")
55
- area_raw = get_value(df, "Area")
56
-
57
- # Format area with proper rounding and units
58
- try:
59
- area_num = float(area_raw)
60
- if area_num < 1:
61
- area_text = f"{area_num:.3f} hectares"
62
- elif area_num < 100:
63
- area_text = f"{area_num:.2f} hectares"
64
- else:
65
- area_text = f"{area_num:,.1f} hectares"
66
- except:
67
- area_text = str(area_raw) if area_raw != "Not available" else "Not available"
68
-
69
- risk_level = get_value(df, "risk_level")
70
- risk_pcrop = get_value(df, "risk_pcrop")
71
- risk_acrop = get_value(df, "risk_acrop")
72
- risk_timber = get_value(df, "risk_timber")
73
- def_after_2020_raw = get_value(df, "TMF_def_after_2020")
74
-
75
- # Helper function to format risk levels with colors/emojis
76
- def format_risk(risk_val):
77
- if not risk_val or risk_val in ["Not available", "not available"]:
78
- return "🔍 **Not Available** *(Analysis pending)*"
79
- elif isinstance(risk_val, str):
80
- risk_lower = risk_val.lower().strip()
81
- if risk_lower == "low":
82
- return "🟢 **Low Risk**"
83
- elif risk_lower == "medium":
84
- return "🟡 **Medium Risk**"
85
- elif risk_lower == "high":
86
- return "🟠 **High Risk**"
87
- elif risk_lower == "very high":
88
- return "🔴 **Very High Risk**"
89
- elif risk_lower == "more_info_needed":
90
- return "📊 **Assessment Pending** *(More data needed)*"
91
- else:
92
- return f"ℹ️ **{risk_val.title()}**"
93
- return str(risk_val)
94
-
95
- # Format deforestation data
96
- def format_deforestation(def_val):
97
- if not def_val or def_val in ["Not available", "not available"]:
98
- return "🔍 **No Data Available**"
99
- try:
100
- def_num = float(def_val)
101
- if def_num == 0:
102
- return "✅ **No Recent Deforestation Detected**"
103
- elif def_num < 0.1:
104
- return f"⚠️ **{def_num:.3f} hectares detected**"
105
- else:
106
- return f"⚠️ **{def_num:.2f} hectares detected**"
107
- except:
108
- return f"ℹ️ **{def_val}**"
109
-
110
- # Create EUDR compliance assessment
111
- def get_compliance_status(def_after_2020):
112
- try:
113
- def_num = float(def_after_2020)
114
- if def_num == 0:
115
- return "✅ **COMPLIANT** - No recent deforestation detected"
116
- elif def_num > 0:
117
- return "⚠️ **REQUIRES ATTENTION** - Recent deforestation detected"
118
- except:
119
- return "🔍 **ASSESSMENT NEEDED** - Insufficient data for compliance determination"
120
-
121
- deforestation_formatted = format_deforestation(def_after_2020_raw)
122
- compliance_status = get_compliance_status(def_after_2020_raw)
123
-
124
- output = f"""🌍 **Geographic Analysis Results**
125
-
126
- 📍 **Location Details**
127
- - **Country**: {country}
128
- - **Administrative Region**: {admin_level}
129
- - **Total Area**: {area_text}
130
-
131
- ⚠️ **Deforestation Risk Assessment**
132
- *Risk levels are based on historical patterns, environmental factors, and land use data*
133
-
134
- - **Overall Risk**: {format_risk(risk_level)}
135
- - **Permanent Crops**: {format_risk(risk_pcrop)}
136
- *Coffee, cocoa, palm oil, fruit trees*
137
- - **Annual Crops**: {format_risk(risk_acrop)}
138
- *Soy, corn, rice, vegetables*
139
- - **Timber Extraction**: {format_risk(risk_timber)}
140
- *Logging and wood harvesting*
141
-
142
- 🌳 **EUDR Compliance Analysis**
143
- *Based on Tropical Moist Forest satellite monitoring*
144
-
145
- **Recent Deforestation (2020-Present):** {deforestation_formatted}
146
-
147
- **EUDR Compliance Status:** {compliance_status}
148
-
149
- ---
150
- 💡 **Key Insights**
151
- \t **For Suppliers**: {compliance_status.split(' - ')[1] if ' - ' in compliance_status else 'Review compliance requirements carefully'} \n
152
- \t **Risk Factors**: Focus on {', '.join([t.split('*')[1].strip('*') for t in [risk_pcrop, risk_acrop, risk_timber] if 'High' in format_risk(t)])} if any high-risk activities detected \n
153
- \t **Next Steps**: {"Conduct additional due diligence if recent deforestation is detected" if "ATTENTION" in compliance_status else "Continue monitoring and maintain documentation"} \n
154
- """
155
- return output
156
- except Exception as e:
157
- return f"❌ **Analysis Error**\n\nUnable to process the geographic data: {str(e)}\n\n📋 **Troubleshooting:**\n- Verify your GeoJSON file format\n- Check file size (should be < 10MB)\n- Ensure coordinates are valid\n\nPlease try uploading again or contact support."
158
 
159
- def handle_geojson_upload(file):
160
- """Handle GeoJSON file upload and call WHISP API"""
161
- if file is not None:
162
- try:
163
- # Initialize WHISP API client
164
- client = Client("https://giz-chatfed-whisp.hf.space/")
165
-
166
- # Call the API with the uploaded file
167
- result = client.predict(
168
- file=handle_file(file.name),
169
- api_name="/get_statistics"
170
- )
171
-
172
- # Convert result to DataFrame
173
- df = pd.DataFrame(result['data'], columns=result['headers'])
174
-
175
- # Format statistics into readable text
176
- formatted_stats = format_whisp_statistics(df)
177
-
178
- return (
179
- formatted_stats, # Keep formatted statistics for chat
180
- gr.update(visible=True), # Keep status visible
181
- gr.update(visible=False) # Always hide results table
182
- )
183
-
184
- except Exception as e:
185
- error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
186
- return (
187
- error_msg,
188
- gr.update(visible=True), # upload_status
189
- gr.update(visible=False) # results_table
190
- )
191
- else:
192
- return (
193
- "",
194
- gr.update(visible=False), # upload_status
195
- gr.update(visible=False) # results_table
196
- )
197
 
198
  def retrieve_paragraphs(query):
199
  """Connect to retriever and retrieve paragraphs"""
 
4
  import asyncio
5
  from uuid import uuid4
6
  from gradio_client import Client, handle_file
7
+ from utils.whisp_api import handle_geojson_upload
8
 
9
  # Sample questions for examples
10
  SAMPLE_QUESTIONS = {
 
25
  ]
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def retrieve_paragraphs(query):
31
  """Connect to retriever and retrieve paragraphs"""
utils/__pycache__/whisp_api.cpython-310.pyc ADDED
Binary file (5.5 kB). View file
 
utils/whisp_api.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_client import Client, handle_file
3
+ import pandas as pd
4
+
5
+ def get_value(df, colname):
6
+ """Fetch value from WhispAPI-style Column/Value dataframe"""
7
+ if "Column" in df.columns and "Value" in df.columns:
8
+ match = df.loc[df["Column"] == colname, "Value"]
9
+ if not match.empty:
10
+ return match.values[0]
11
+ return "Not available"
12
+
13
+ def format_whisp_statistics(df):
14
+ """Format WhispAPI statistics into readable text for end-users"""
15
+ try:
16
+ # Country code mapping for better display
17
+ country_codes = {
18
+ 'HND': 'Honduras',
19
+ 'GTM': 'Guatemala',
20
+ 'ECU': 'Ecuador',
21
+ 'COL': 'Colombia',
22
+ 'PER': 'Peru',
23
+ 'BRA': 'Brazil',
24
+ 'BOL': 'Bolivia',
25
+ 'CRI': 'Costa Rica',
26
+ 'PAN': 'Panama',
27
+ 'NIC': 'Nicaragua'
28
+ }
29
+
30
+ country_raw = get_value(df, "Country")
31
+ country = country_codes.get(country_raw, country_raw)
32
+ admin_level = get_value(df, "Admin_Level_1")
33
+ area_raw = get_value(df, "Area")
34
+
35
+ # Format area with proper rounding and units
36
+ try:
37
+ area_num = float(area_raw)
38
+ if area_num < 1:
39
+ area_text = f"{area_num:.3f} hectares"
40
+ elif area_num < 100:
41
+ area_text = f"{area_num:.2f} hectares"
42
+ else:
43
+ area_text = f"{area_num:,.1f} hectares"
44
+ except:
45
+ area_text = str(area_raw) if area_raw != "Not available" else "Not available"
46
+
47
+ risk_level = get_value(df, "risk_level")
48
+ risk_pcrop = get_value(df, "risk_pcrop")
49
+ risk_acrop = get_value(df, "risk_acrop")
50
+ risk_timber = get_value(df, "risk_timber")
51
+ def_after_2020_raw = get_value(df, "TMF_def_after_2020")
52
+
53
+ # Helper function to format risk levels with colors/emojis
54
+ def format_risk(risk_val):
55
+ if not risk_val or risk_val in ["Not available", "not available"]:
56
+ return "🔍 **Not Available** *(Analysis pending)*"
57
+ elif isinstance(risk_val, str):
58
+ risk_lower = risk_val.lower().strip()
59
+ if risk_lower == "low":
60
+ return "🟢 **Low Risk**"
61
+ elif risk_lower == "medium":
62
+ return "🟡 **Medium Risk**"
63
+ elif risk_lower == "high":
64
+ return "🟠 **High Risk**"
65
+ elif risk_lower == "very high":
66
+ return "🔴 **Very High Risk**"
67
+ elif risk_lower == "more_info_needed":
68
+ return "📊 **Assessment Pending** *(More data needed)*"
69
+ else:
70
+ return f"ℹ️ **{risk_val.title()}**"
71
+ return str(risk_val)
72
+
73
+ # Format deforestation data
74
+ def format_deforestation(def_val):
75
+ if not def_val or def_val in ["Not available", "not available"]:
76
+ return "🔍 **No Data Available**"
77
+ try:
78
+ def_num = float(def_val)
79
+ if def_num == 0:
80
+ return "✅ **No Recent Deforestation Detected**"
81
+ elif def_num < 0.1:
82
+ return f"⚠️ **{def_num:.3f} hectares detected**"
83
+ else:
84
+ return f"⚠️ **{def_num:.2f} hectares detected**"
85
+ except:
86
+ return f"ℹ️ **{def_val}**"
87
+
88
+ # Create EUDR compliance assessment
89
+ def get_compliance_status(def_after_2020):
90
+ try:
91
+ def_num = float(def_after_2020)
92
+ if def_num == 0:
93
+ return "✅ **COMPLIANT** - No recent deforestation detected"
94
+ elif def_num > 0:
95
+ return "⚠️ **REQUIRES ATTENTION** - Recent deforestation detected"
96
+ except:
97
+ return "🔍 **ASSESSMENT NEEDED** - Insufficient data for compliance determination"
98
+
99
+ deforestation_formatted = format_deforestation(def_after_2020_raw)
100
+ compliance_status = get_compliance_status(def_after_2020_raw)
101
+
102
+ output = f"""🌍 **Geographic Analysis Results**
103
+
104
+ 📍 **Location Details**
105
+ - **Country**: {country}
106
+ - **Administrative Region**: {admin_level}
107
+ - **Total Area**: {area_text}
108
+
109
+ ⚠️ **Deforestation Risk Assessment**
110
+ *Risk levels are based on historical patterns, environmental factors, and land use data*
111
+
112
+ - **Overall Risk**: {format_risk(risk_level)}
113
+ - **Permanent Crops**: {format_risk(risk_pcrop)}
114
+ *Coffee, cocoa, palm oil, fruit trees*
115
+ - **Annual Crops**: {format_risk(risk_acrop)}
116
+ *Soy, corn, rice, vegetables*
117
+ - **Timber Extraction**: {format_risk(risk_timber)}
118
+ *Logging and wood harvesting*
119
+
120
+ 🌳 **EUDR Compliance Analysis**
121
+ *Based on Tropical Moist Forest satellite monitoring*
122
+
123
+ **Recent Deforestation (2020-Present):** {deforestation_formatted}
124
+
125
+ **EUDR Compliance Status:** {compliance_status}
126
+
127
+ ---
128
+ 💡 **Key Insights**
129
+ \t **For Suppliers**: {compliance_status.split(' - ')[1] if ' - ' in compliance_status else 'Review compliance requirements carefully'} \n
130
+ \t **Risk Factors**: Focus on {', '.join([t.split('*')[1].strip('*') for t in [risk_pcrop, risk_acrop, risk_timber] if 'High' in format_risk(t)])} if any high-risk activities detected \n
131
+ \t **Next Steps**: {"Conduct additional due diligence if recent deforestation is detected" if "ATTENTION" in compliance_status else "Continue monitoring and maintain documentation"} \n
132
+ """
133
+ return output
134
+ except Exception as e:
135
+ return f"❌ **Analysis Error**\n\nUnable to process the geographic data: {str(e)}\n\n📋 **Troubleshooting:**\n- Verify your GeoJSON file format\n- Check file size (should be < 10MB)\n- Ensure coordinates are valid\n\nPlease try uploading again or contact support."
136
+
137
+ def handle_geojson_upload(file):
138
+ """Handle GeoJSON file upload and call WHISP API"""
139
+ if file is not None:
140
+ try:
141
+ # Initialize WHISP API client
142
+ client = Client("https://giz-chatfed-whisp.hf.space/")
143
+
144
+ # Call the API with the uploaded file
145
+ result = client.predict(
146
+ file=handle_file(file.name),
147
+ api_name="/get_statistics"
148
+ )
149
+
150
+ # Convert result to DataFrame
151
+ df = pd.DataFrame(result['data'], columns=result['headers'])
152
+
153
+ # Format statistics into readable text
154
+ formatted_stats = format_whisp_statistics(df)
155
+
156
+ return (
157
+ formatted_stats, # Keep formatted statistics for chat
158
+ gr.update(visible=True), # Keep status visible
159
+ gr.update(visible=False) # Always hide results table
160
+ )
161
+
162
+ except Exception as e:
163
+ error_msg = f"❌ Error processing GeoJSON file: {str(e)}"
164
+ return (
165
+ error_msg,
166
+ gr.update(visible=True), # upload_status
167
+ gr.update(visible=False) # results_table
168
+ )
169
+ else:
170
+ return (
171
+ "",
172
+ gr.update(visible=False), # upload_status
173
+ gr.update(visible=False) # results_table
174
+ )