Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,10 @@ from typing import Optional, Tuple, Dict, Any
|
|
12 |
import logging
|
13 |
from datetime import datetime
|
14 |
import re
|
15 |
-
|
16 |
-
from
|
|
|
|
|
17 |
|
18 |
# Configure logging
|
19 |
logging.basicConfig(level=logging.INFO)
|
@@ -24,6 +26,8 @@ class EnhancedDataAnalyzer:
|
|
24 |
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
|
25 |
self.max_file_size = 50 * 1024 * 1024 # 50MB limit
|
26 |
self.conversation_history = []
|
|
|
|
|
27 |
|
28 |
def validate_api_key(self, api_key: str) -> bool:
|
29 |
"""Validate API key format"""
|
@@ -129,7 +133,7 @@ Format your response with clear sections and bullet points for readability."""
|
|
129 |
logger.error(f"API Error: {str(e)}")
|
130 |
return f"β **Connection Error**: {str(e)}"
|
131 |
|
132 |
-
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str,
|
133 |
"""Enhanced file processing with better error handling"""
|
134 |
try:
|
135 |
file_extension = os.path.splitext(file_path)[1].lower()
|
@@ -153,11 +157,14 @@ Format your response with clear sections and bullet points for readability."""
|
|
153 |
# Clean column names
|
154 |
df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
|
155 |
|
|
|
|
|
|
|
156 |
# Generate enhanced summaries
|
157 |
data_summary = self.generate_enhanced_summary(df)
|
158 |
-
|
159 |
|
160 |
-
return df, data_summary,
|
161 |
|
162 |
except Exception as e:
|
163 |
raise Exception(f"Error processing file: {str(e)}")
|
@@ -223,28 +230,231 @@ Format your response with clear sections and bullet points for readability."""
|
|
223 |
|
224 |
return "\n".join(summary)
|
225 |
|
226 |
-
def
|
227 |
-
"""Generate
|
228 |
-
|
229 |
-
|
230 |
-
# Numerical distribution charts
|
231 |
-
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
232 |
-
if len(numeric_cols) > 0:
|
233 |
-
for col in numeric_cols[:3]: # First 3 numeric columns
|
234 |
-
fig = px.histogram(df, x=col, title=f"Distribution of {col}")
|
235 |
-
charts[f"hist_{col}"] = fig
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
# Initialize the analyzer
|
250 |
analyzer = EnhancedDataAnalyzer()
|
@@ -252,31 +462,29 @@ analyzer = EnhancedDataAnalyzer()
|
|
252 |
async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
253 |
"""Enhanced analysis function with progress tracking"""
|
254 |
if not file:
|
255 |
-
return "β Please upload a CSV or Excel file.", "", "", None
|
256 |
-
|
257 |
if not analyzer.validate_api_key(api_key):
|
258 |
-
return "β Please enter a valid Chutes API key (minimum 10 characters).", "", "", None
|
259 |
-
|
260 |
# Validate file
|
261 |
is_valid, validation_msg = analyzer.validate_file(file)
|
262 |
if not is_valid:
|
263 |
-
return f"β {validation_msg}", "", "", None
|
264 |
-
|
265 |
progress(0.1, desc="π Reading file...")
|
266 |
-
|
267 |
try:
|
268 |
# Process the uploaded file
|
269 |
-
df, data_summary,
|
270 |
progress(0.3, desc="π Processing data...")
|
271 |
-
|
272 |
-
# Generate visualizations
|
273 |
-
chart_html = create_basic_charts(df)
|
274 |
progress(0.5, desc="π€ Generating AI insights...")
|
275 |
-
|
276 |
# Get AI analysis
|
277 |
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
|
278 |
progress(0.9, desc="β¨ Finalizing results...")
|
279 |
-
|
280 |
# Format the complete response
|
281 |
response = f"""# π― Analysis Complete!
|
282 |
|
@@ -286,57 +494,46 @@ async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
|
286 |
*Analysis powered by OpenAI gpt-oss-20b via Chutes β’ Generated at {datetime.now().strftime('%H:%M:%S')}*
|
287 |
"""
|
288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
progress(1.0, desc="β
Done!")
|
290 |
-
return response, data_summary,
|
291 |
-
|
292 |
-
except Exception as e:
|
293 |
-
logger.error(f"Analysis error: {str(e)}")
|
294 |
-
return f"β **Error**: {str(e)}", "", "", None
|
295 |
|
296 |
-
def create_basic_charts(df: pd.DataFrame) -> str:
|
297 |
-
"""Create basic visualizations for the dataset"""
|
298 |
-
charts_html = []
|
299 |
-
|
300 |
-
try:
|
301 |
-
# Chart 1: Data completeness heatmap
|
302 |
-
missing_data = df.isnull().sum()
|
303 |
-
if missing_data.sum() > 0:
|
304 |
-
fig = px.bar(x=missing_data.index, y=missing_data.values,
|
305 |
-
title="Missing Data by Column",
|
306 |
-
labels={'x': 'Columns', 'y': 'Missing Count'})
|
307 |
-
fig.update_layout(height=400, showlegend=False)
|
308 |
-
charts_html.append(fig.to_html(include_plotlyjs='cdn'))
|
309 |
-
|
310 |
-
# Chart 2: Numerical columns correlation (if multiple numeric columns)
|
311 |
-
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
312 |
-
if len(numeric_cols) > 1:
|
313 |
-
corr_matrix = df[numeric_cols].corr()
|
314 |
-
fig = px.imshow(corr_matrix,
|
315 |
-
title="Correlation Matrix",
|
316 |
-
color_continuous_scale='RdBu_r',
|
317 |
-
aspect="auto")
|
318 |
-
fig.update_layout(height=500)
|
319 |
-
charts_html.append(fig.to_html(include_plotlyjs='cdn'))
|
320 |
-
|
321 |
-
# Chart 3: Distribution of first numeric column
|
322 |
-
if len(numeric_cols) > 0:
|
323 |
-
first_numeric = numeric_cols[0]
|
324 |
-
fig = px.histogram(df, x=first_numeric,
|
325 |
-
title=f"Distribution: {first_numeric}",
|
326 |
-
marginal="box")
|
327 |
-
fig.update_layout(height=400)
|
328 |
-
charts_html.append(fig.to_html(include_plotlyjs='cdn'))
|
329 |
-
|
330 |
-
# Additional charts from generate_chart_data
|
331 |
-
charts_data = analyzer.generate_chart_data(df)
|
332 |
-
for key, fig in charts_data.items():
|
333 |
-
charts_html.append(fig.to_html(include_plotlyjs='cdn'))
|
334 |
-
|
335 |
-
return "\n".join(charts_html) if charts_html else "<p>No charts generated for this dataset.</p>"
|
336 |
-
|
337 |
except Exception as e:
|
338 |
-
logger.error(f"
|
339 |
-
return f"
|
340 |
|
341 |
def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
342 |
"""Synchronous wrapper for the async analyze function"""
|
@@ -344,15 +541,43 @@ def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
|
344 |
|
345 |
def clear_all():
|
346 |
"""Clear all inputs and outputs"""
|
347 |
-
|
|
|
|
|
348 |
|
349 |
-
def
|
350 |
-
"""Generate downloadable
|
351 |
if not analysis_text:
|
352 |
-
return None
|
|
|
|
|
|
|
353 |
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
|
356 |
|
357 |
## AI Analysis:
|
358 |
{analysis_text}
|
@@ -360,35 +585,15 @@ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
360 |
## Raw Data Summary:
|
361 |
{data_summary}
|
362 |
"""
|
363 |
-
|
364 |
-
base_filename = f"data_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
365 |
-
filename = None
|
366 |
-
|
367 |
-
try:
|
368 |
-
if format_choice == "PDF":
|
369 |
-
# Convert MD to HTML first
|
370 |
-
report_html = markdown.markdown(report_md)
|
371 |
-
# Wrap in basic HTML structure for better PDF rendering
|
372 |
-
full_html = f"""
|
373 |
-
<html>
|
374 |
-
<head><style>body {{ font-family: Arial, sans-serif; }}</style></head>
|
375 |
-
<body>{report_html}</body>
|
376 |
-
</html>
|
377 |
-
"""
|
378 |
-
filename = base_filename + ".pdf"
|
379 |
-
WeasyHTML(string=full_html).write_pdf(filename)
|
380 |
-
|
381 |
-
elif format_choice == "HTML":
|
382 |
-
report_html = markdown.markdown(report_md, output_format='html5')
|
383 |
-
filename = base_filename + ".html"
|
384 |
with open(filename, 'w', encoding='utf-8') as f:
|
385 |
-
f.write(
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
except Exception as e:
|
390 |
-
logger.error(f"
|
391 |
-
return None
|
392 |
|
393 |
# Create enhanced Gradio interface
|
394 |
with gr.Blocks(
|
@@ -408,15 +613,25 @@ with gr.Blocks(
|
|
408 |
text-align: center;
|
409 |
background: #f8f9ff;
|
410 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
"""
|
412 |
) as app:
|
413 |
|
|
|
|
|
|
|
414 |
# Header
|
415 |
gr.Markdown("""
|
416 |
# π Smart Data Analyzer Pro
|
417 |
### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
|
418 |
|
419 |
-
Upload your data files and get instant professional insights, visualizations, and
|
420 |
""")
|
421 |
|
422 |
# Main interface
|
@@ -483,7 +698,8 @@ with gr.Blocks(
|
|
483 |
with gr.Tab("π Visualizations"):
|
484 |
charts_output = gr.HTML(
|
485 |
label="Auto-Generated Charts",
|
486 |
-
value="<p
|
|
|
487 |
)
|
488 |
|
489 |
with gr.Tab("π Raw Summary"):
|
@@ -494,15 +710,20 @@ with gr.Blocks(
|
|
494 |
show_copy_button=True
|
495 |
)
|
496 |
|
497 |
-
with gr.Tab("πΎ Export"):
|
498 |
-
gr.Markdown("### Download Your Analysis Report")
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
|
|
|
|
|
|
|
|
|
|
506 |
|
507 |
# Event handlers
|
508 |
def update_file_stats(file):
|
@@ -516,19 +737,35 @@ with gr.Blocks(
|
|
516 |
except:
|
517 |
return "File information unavailable"
|
518 |
|
519 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
analyze_btn.click(
|
521 |
-
fn=
|
522 |
inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
|
523 |
-
outputs=[analysis_output, raw_summary, data_preview, charts_output],
|
524 |
show_progress=True
|
525 |
)
|
526 |
|
527 |
# Follow-up questions
|
528 |
ask_btn.click(
|
529 |
-
fn=
|
530 |
inputs=[file_input, api_key_input, question_input],
|
531 |
-
outputs=[question_output
|
532 |
show_progress=True
|
533 |
)
|
534 |
|
@@ -543,14 +780,14 @@ with gr.Blocks(
|
|
543 |
clear_btn.click(
|
544 |
fn=clear_all,
|
545 |
outputs=[file_input, api_key_input, question_input, analysis_output,
|
546 |
-
question_output, data_preview, charts_output]
|
547 |
)
|
548 |
|
549 |
-
#
|
550 |
download_btn.click(
|
551 |
-
fn=
|
552 |
-
inputs=[analysis_output, raw_summary, format_choice],
|
553 |
-
outputs=[download_file]
|
554 |
)
|
555 |
|
556 |
# Footer with usage tips
|
@@ -563,6 +800,18 @@ with gr.Blocks(
|
|
563 |
- Use descriptive column names
|
564 |
- Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
|
565 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
566 |
**β‘ Speed Optimization:**
|
567 |
- Files under 10MB process fastest
|
568 |
- CSV files typically load faster than Excel
|
@@ -571,14 +820,13 @@ with gr.Blocks(
|
|
571 |
**π§ Supported Formats:** CSV, XLSX, XLS | **π Max Size:** 50MB | **π Response Time:** ~3-5 seconds
|
572 |
""")
|
573 |
|
|
|
|
|
|
|
|
|
574 |
# Launch configuration
|
575 |
if __name__ == "__main__":
|
576 |
app.queue(max_size=10) # Handle multiple users
|
577 |
app.launch(
|
578 |
-
share=True
|
579 |
-
server_name="0.0.0.0",
|
580 |
-
server_port=7860,
|
581 |
-
show_error=True,
|
582 |
-
favicon_path=None,
|
583 |
-
ssl_verify=False
|
584 |
)
|
|
|
12 |
import logging
|
13 |
from datetime import datetime
|
14 |
import re
|
15 |
+
import base64
|
16 |
+
from io import BytesIO
|
17 |
+
import weasyprint # For PDF generation
|
18 |
+
from jinja2 import Template # For HTML templating
|
19 |
|
20 |
# Configure logging
|
21 |
logging.basicConfig(level=logging.INFO)
|
|
|
26 |
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
|
27 |
self.max_file_size = 50 * 1024 * 1024 # 50MB limit
|
28 |
self.conversation_history = []
|
29 |
+
self.current_df = None
|
30 |
+
self.current_charts = None
|
31 |
|
32 |
def validate_api_key(self, api_key: str) -> bool:
|
33 |
"""Validate API key format"""
|
|
|
133 |
logger.error(f"API Error: {str(e)}")
|
134 |
return f"β **Connection Error**: {str(e)}"
|
135 |
|
136 |
+
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, str]:
|
137 |
"""Enhanced file processing with better error handling"""
|
138 |
try:
|
139 |
file_extension = os.path.splitext(file_path)[1].lower()
|
|
|
157 |
# Clean column names
|
158 |
df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
|
159 |
|
160 |
+
# Store dataframe for visualizations
|
161 |
+
self.current_df = df
|
162 |
+
|
163 |
# Generate enhanced summaries
|
164 |
data_summary = self.generate_enhanced_summary(df)
|
165 |
+
charts_html = self.generate_visualizations(df)
|
166 |
|
167 |
+
return df, data_summary, charts_html
|
168 |
|
169 |
except Exception as e:
|
170 |
raise Exception(f"Error processing file: {str(e)}")
|
|
|
230 |
|
231 |
return "\n".join(summary)
|
232 |
|
233 |
+
def generate_visualizations(self, df: pd.DataFrame) -> str:
|
234 |
+
"""Generate comprehensive visualizations for the dataset"""
|
235 |
+
charts_html = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
+
try:
|
238 |
+
# Chart 1: Data completeness analysis
|
239 |
+
missing_data = df.isnull().sum()
|
240 |
+
if missing_data.sum() > 0:
|
241 |
+
fig = px.bar(
|
242 |
+
x=missing_data.index,
|
243 |
+
y=missing_data.values,
|
244 |
+
title="π Missing Data Analysis",
|
245 |
+
labels={'x': 'Columns', 'y': 'Missing Values Count'},
|
246 |
+
color=missing_data.values,
|
247 |
+
color_continuous_scale='Reds'
|
248 |
+
)
|
249 |
+
fig.update_layout(
|
250 |
+
height=400,
|
251 |
+
showlegend=False,
|
252 |
+
title_x=0.5,
|
253 |
+
xaxis_tickangle=-45
|
254 |
+
)
|
255 |
+
charts_html.append(f"<h3>π Data Quality Overview</h3>")
|
256 |
+
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="missing_data_chart"))
|
257 |
+
|
258 |
+
# Chart 2: Numerical columns correlation heatmap
|
259 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
260 |
+
if len(numeric_cols) > 1:
|
261 |
+
corr_matrix = df[numeric_cols].corr()
|
262 |
+
fig = px.imshow(
|
263 |
+
corr_matrix,
|
264 |
+
title="π Correlation Matrix - Numerical Variables",
|
265 |
+
color_continuous_scale='RdBu_r',
|
266 |
+
aspect="auto",
|
267 |
+
text_auto=True
|
268 |
+
)
|
269 |
+
fig.update_layout(height=500, title_x=0.5)
|
270 |
+
charts_html.append(f"<h3>π Correlation Analysis</h3>")
|
271 |
+
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="correlation_chart"))
|
272 |
+
|
273 |
+
# Chart 3: Distribution plots for numerical columns
|
274 |
+
if len(numeric_cols) > 0:
|
275 |
+
for i, col in enumerate(numeric_cols[:3]): # First 3 numeric columns
|
276 |
+
fig = px.histogram(
|
277 |
+
df,
|
278 |
+
x=col,
|
279 |
+
title=f"π Distribution: {col}",
|
280 |
+
marginal="box",
|
281 |
+
nbins=30
|
282 |
+
)
|
283 |
+
fig.update_layout(height=400, title_x=0.5)
|
284 |
+
if i == 0:
|
285 |
+
charts_html.append(f"<h3>π Data Distributions</h3>")
|
286 |
+
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"dist_chart_{i}"))
|
287 |
+
|
288 |
+
# Chart 4: Categorical analysis
|
289 |
+
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
290 |
+
if len(categorical_cols) > 0:
|
291 |
+
for i, col in enumerate(categorical_cols[:2]): # First 2 categorical columns
|
292 |
+
if df[col].nunique() <= 20: # Only if reasonable number of categories
|
293 |
+
value_counts = df[col].value_counts().head(10)
|
294 |
+
fig = px.bar(
|
295 |
+
x=value_counts.values,
|
296 |
+
y=value_counts.index,
|
297 |
+
orientation='h',
|
298 |
+
title=f"π Top 10 Values: {col}",
|
299 |
+
labels={'x': 'Count', 'y': col}
|
300 |
+
)
|
301 |
+
fig.update_layout(height=400, title_x=0.5)
|
302 |
+
if i == 0:
|
303 |
+
charts_html.append(f"<h3>π Categorical Data Analysis</h3>")
|
304 |
+
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"cat_chart_{i}"))
|
305 |
+
|
306 |
+
# Chart 5: Data overview summary
|
307 |
+
summary_data = {
|
308 |
+
'Metric': ['Total Rows', 'Total Columns', 'Numeric Columns', 'Categorical Columns', 'Missing Values'],
|
309 |
+
'Count': [
|
310 |
+
len(df),
|
311 |
+
len(df.columns),
|
312 |
+
len(numeric_cols),
|
313 |
+
len(categorical_cols),
|
314 |
+
df.isnull().sum().sum()
|
315 |
+
]
|
316 |
+
}
|
317 |
+
|
318 |
+
fig = px.bar(
|
319 |
+
summary_data,
|
320 |
+
x='Metric',
|
321 |
+
y='Count',
|
322 |
+
title="π Dataset Overview",
|
323 |
+
color='Count',
|
324 |
+
color_continuous_scale='Blues'
|
325 |
+
)
|
326 |
+
fig.update_layout(height=400, title_x=0.5, showlegend=False)
|
327 |
+
charts_html.append(f"<h3>π Dataset Overview</h3>")
|
328 |
+
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="overview_chart"))
|
329 |
+
|
330 |
+
# Store charts for export
|
331 |
+
self.current_charts = charts_html
|
332 |
+
|
333 |
+
return "\n".join(charts_html) if charts_html else "<p>No charts could be generated for this dataset.</p>"
|
334 |
|
335 |
+
except Exception as e:
|
336 |
+
logger.error(f"Chart generation error: {str(e)}")
|
337 |
+
return f"<p>β Chart generation failed: {str(e)}</p>"
|
338 |
+
|
339 |
+
def generate_report_html(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str:
|
340 |
+
"""Generate HTML report with embedded charts"""
|
341 |
+
|
342 |
+
html_template = """
|
343 |
+
<!DOCTYPE html>
|
344 |
+
<html>
|
345 |
+
<head>
|
346 |
+
<meta charset="UTF-8">
|
347 |
+
<title>Data Analysis Report</title>
|
348 |
+
<style>
|
349 |
+
body {
|
350 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
351 |
+
line-height: 1.6;
|
352 |
+
color: #333;
|
353 |
+
max-width: 1200px;
|
354 |
+
margin: 0 auto;
|
355 |
+
padding: 20px;
|
356 |
+
background: #f8f9fa;
|
357 |
+
}
|
358 |
+
.header {
|
359 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
360 |
+
color: white;
|
361 |
+
padding: 30px;
|
362 |
+
border-radius: 10px;
|
363 |
+
margin-bottom: 30px;
|
364 |
+
text-align: center;
|
365 |
+
}
|
366 |
+
.section {
|
367 |
+
background: white;
|
368 |
+
padding: 25px;
|
369 |
+
margin-bottom: 20px;
|
370 |
+
border-radius: 8px;
|
371 |
+
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
372 |
+
}
|
373 |
+
.chart-container {
|
374 |
+
margin: 20px 0;
|
375 |
+
padding: 15px;
|
376 |
+
background: #f8f9ff;
|
377 |
+
border-radius: 8px;
|
378 |
+
border-left: 4px solid #667eea;
|
379 |
+
}
|
380 |
+
h1, h2, h3 { color: #2c3e50; }
|
381 |
+
.metadata {
|
382 |
+
background: #e8f4f8;
|
383 |
+
padding: 15px;
|
384 |
+
border-radius: 5px;
|
385 |
+
margin-bottom: 20px;
|
386 |
+
}
|
387 |
+
.footer {
|
388 |
+
text-align: center;
|
389 |
+
color: #666;
|
390 |
+
margin-top: 40px;
|
391 |
+
padding: 20px;
|
392 |
+
background: #f1f1f1;
|
393 |
+
border-radius: 5px;
|
394 |
+
}
|
395 |
+
pre {
|
396 |
+
background: #f4f4f4;
|
397 |
+
padding: 15px;
|
398 |
+
border-radius: 5px;
|
399 |
+
overflow-x: auto;
|
400 |
+
white-space: pre-wrap;
|
401 |
+
}
|
402 |
+
</style>
|
403 |
+
</head>
|
404 |
+
<body>
|
405 |
+
<div class="header">
|
406 |
+
<h1>π Smart Data Analysis Report</h1>
|
407 |
+
<p>Comprehensive AI-Powered Data Insights</p>
|
408 |
+
</div>
|
409 |
+
|
410 |
+
<div class="metadata">
|
411 |
+
<strong>π File:</strong> {{ file_name }}<br>
|
412 |
+
<strong>π
Generated:</strong> {{ timestamp }}<br>
|
413 |
+
<strong>π€ Model:</strong> OpenAI gpt-oss-20b via Chutes AI
|
414 |
+
</div>
|
415 |
+
|
416 |
+
<div class="section">
|
417 |
+
<h2>π― AI Analysis & Insights</h2>
|
418 |
+
<div>{{ ai_analysis }}</div>
|
419 |
+
</div>
|
420 |
+
|
421 |
+
<div class="section">
|
422 |
+
<h2>π Visualizations</h2>
|
423 |
+
<div class="chart-container">
|
424 |
+
{{ charts_html }}
|
425 |
+
</div>
|
426 |
+
</div>
|
427 |
+
|
428 |
+
<div class="section">
|
429 |
+
<h2>π Technical Data Summary</h2>
|
430 |
+
<pre>{{ data_summary }}</pre>
|
431 |
+
</div>
|
432 |
+
|
433 |
+
<div class="footer">
|
434 |
+
<p>Report generated by Smart Data Analyzer Pro β’ Powered by AI</p>
|
435 |
+
<p>For questions or support, visit chutes.ai</p>
|
436 |
+
</div>
|
437 |
+
</body>
|
438 |
+
</html>
|
439 |
+
"""
|
440 |
+
|
441 |
+
template = Template(html_template)
|
442 |
+
|
443 |
+
# Convert markdown to HTML for AI analysis
|
444 |
+
ai_analysis_html = analysis_text.replace('\n', '<br>')
|
445 |
+
ai_analysis_html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', ai_analysis_html)
|
446 |
+
ai_analysis_html = re.sub(r'## (.*?)\n', r'<h3>\1</h3>', ai_analysis_html)
|
447 |
+
ai_analysis_html = re.sub(r'# (.*?)\n', r'<h2>\1</h2>', ai_analysis_html)
|
448 |
+
|
449 |
+
charts_content = "\n".join(self.current_charts) if self.current_charts else "<p>No visualizations available</p>"
|
450 |
+
|
451 |
+
return template.render(
|
452 |
+
file_name=file_name,
|
453 |
+
timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
454 |
+
ai_analysis=ai_analysis_html,
|
455 |
+
charts_html=charts_content,
|
456 |
+
data_summary=data_summary
|
457 |
+
)
|
458 |
|
459 |
# Initialize the analyzer
|
460 |
analyzer = EnhancedDataAnalyzer()
|
|
|
462 |
async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
463 |
"""Enhanced analysis function with progress tracking"""
|
464 |
if not file:
|
465 |
+
return "β Please upload a CSV or Excel file.", "", "", "", None
|
466 |
+
|
467 |
if not analyzer.validate_api_key(api_key):
|
468 |
+
return "β Please enter a valid Chutes API key (minimum 10 characters).", "", "", "", None
|
469 |
+
|
470 |
# Validate file
|
471 |
is_valid, validation_msg = analyzer.validate_file(file)
|
472 |
if not is_valid:
|
473 |
+
return f"β {validation_msg}", "", "", "", None
|
474 |
+
|
475 |
progress(0.1, desc="π Reading file...")
|
476 |
+
|
477 |
try:
|
478 |
# Process the uploaded file
|
479 |
+
df, data_summary, charts_html = analyzer.process_file(file.name)
|
480 |
progress(0.3, desc="π Processing data...")
|
481 |
+
|
|
|
|
|
482 |
progress(0.5, desc="π€ Generating AI insights...")
|
483 |
+
|
484 |
# Get AI analysis
|
485 |
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
|
486 |
progress(0.9, desc="β¨ Finalizing results...")
|
487 |
+
|
488 |
# Format the complete response
|
489 |
response = f"""# π― Analysis Complete!
|
490 |
|
|
|
494 |
*Analysis powered by OpenAI gpt-oss-20b via Chutes β’ Generated at {datetime.now().strftime('%H:%M:%S')}*
|
495 |
"""
|
496 |
|
497 |
+
# Generate data preview
|
498 |
+
data_preview_html = df.head(15).to_html(
|
499 |
+
classes="table table-striped table-hover",
|
500 |
+
table_id="data-preview-table",
|
501 |
+
escape=False
|
502 |
+
)
|
503 |
+
|
504 |
+
# Add some styling to the preview
|
505 |
+
styled_preview = f"""
|
506 |
+
<style>
|
507 |
+
#data-preview-table {{
|
508 |
+
width: 100%;
|
509 |
+
border-collapse: collapse;
|
510 |
+
margin: 20px 0;
|
511 |
+
font-size: 14px;
|
512 |
+
}}
|
513 |
+
#data-preview-table th {{
|
514 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
515 |
+
color: white;
|
516 |
+
padding: 12px 8px;
|
517 |
+
text-align: left;
|
518 |
+
font-weight: bold;
|
519 |
+
}}
|
520 |
+
#data-preview-table td {{
|
521 |
+
padding: 10px 8px;
|
522 |
+
border-bottom: 1px solid #ddd;
|
523 |
+
}}
|
524 |
+
#data-preview-table tr:hover {{
|
525 |
+
background-color: #f5f5f5;
|
526 |
+
}}
|
527 |
+
</style>
|
528 |
+
{data_preview_html}
|
529 |
+
"""
|
530 |
+
|
531 |
progress(1.0, desc="β
Done!")
|
532 |
+
return response, data_summary, styled_preview, charts_html, file.name
|
|
|
|
|
|
|
|
|
533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
except Exception as e:
|
535 |
+
logger.error(f"Analysis error: {str(e)}")
|
536 |
+
return f"β **Error**: {str(e)}", "", "", "", None
|
537 |
|
538 |
def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
539 |
"""Synchronous wrapper for the async analyze function"""
|
|
|
541 |
|
542 |
def clear_all():
|
543 |
"""Clear all inputs and outputs"""
|
544 |
+
analyzer.current_df = None
|
545 |
+
analyzer.current_charts = None
|
546 |
+
return None, "", "", "", "", "", "", None
|
547 |
|
548 |
+
def download_report(analysis_text, data_summary, file_name, format_choice):
|
549 |
+
"""Generate downloadable report in PDF or HTML format"""
|
550 |
if not analysis_text:
|
551 |
+
return None, "β No analysis data available for download."
|
552 |
+
|
553 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
554 |
+
file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis"
|
555 |
|
556 |
+
try:
|
557 |
+
if format_choice == "HTML":
|
558 |
+
# Generate HTML report
|
559 |
+
html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
|
560 |
+
filename = f"{file_base_name}_analysis_report_{timestamp}.html"
|
561 |
+
|
562 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
563 |
+
f.write(html_content)
|
564 |
+
|
565 |
+
return filename, f"β
HTML report generated successfully! File: {filename}"
|
566 |
+
|
567 |
+
elif format_choice == "PDF":
|
568 |
+
# Generate PDF report
|
569 |
+
html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
|
570 |
+
filename = f"{file_base_name}_analysis_report_{timestamp}.pdf"
|
571 |
+
|
572 |
+
# Convert HTML to PDF using weasyprint
|
573 |
+
weasyprint.HTML(string=html_content).write_pdf(filename)
|
574 |
+
|
575 |
+
return filename, f"β
PDF report generated successfully! File: {filename}"
|
576 |
+
|
577 |
+
else: # Markdown fallback
|
578 |
+
report = f"""# Data Analysis Report
|
579 |
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
580 |
+
File: {file_name}
|
581 |
|
582 |
## AI Analysis:
|
583 |
{analysis_text}
|
|
|
585 |
## Raw Data Summary:
|
586 |
{data_summary}
|
587 |
"""
|
588 |
+
filename = f"{file_base_name}_analysis_report_{timestamp}.md"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
with open(filename, 'w', encoding='utf-8') as f:
|
590 |
+
f.write(report)
|
591 |
+
|
592 |
+
return filename, f"β
Markdown report generated successfully! File: {filename}"
|
593 |
+
|
594 |
except Exception as e:
|
595 |
+
logger.error(f"Report generation error: {str(e)}")
|
596 |
+
return None, f"β Error generating report: {str(e)}"
|
597 |
|
598 |
# Create enhanced Gradio interface
|
599 |
with gr.Blocks(
|
|
|
613 |
text-align: center;
|
614 |
background: #f8f9ff;
|
615 |
}
|
616 |
+
.charts-container {
|
617 |
+
max-height: 800px;
|
618 |
+
overflow-y: auto;
|
619 |
+
padding: 10px;
|
620 |
+
background: #fafafa;
|
621 |
+
border-radius: 8px;
|
622 |
+
}
|
623 |
"""
|
624 |
) as app:
|
625 |
|
626 |
+
# Store file name for downloads
|
627 |
+
current_file_name = gr.State("")
|
628 |
+
|
629 |
# Header
|
630 |
gr.Markdown("""
|
631 |
# π Smart Data Analyzer Pro
|
632 |
### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
|
633 |
|
634 |
+
Upload your data files and get instant professional insights, visualizations, and downloadable reports!
|
635 |
""")
|
636 |
|
637 |
# Main interface
|
|
|
698 |
with gr.Tab("π Visualizations"):
|
699 |
charts_output = gr.HTML(
|
700 |
label="Auto-Generated Charts",
|
701 |
+
value="<div class='charts-container'><p>π Interactive charts will appear here after analysis...</p></div>",
|
702 |
+
elem_classes=["charts-container"]
|
703 |
)
|
704 |
|
705 |
with gr.Tab("π Raw Summary"):
|
|
|
710 |
show_copy_button=True
|
711 |
)
|
712 |
|
713 |
+
with gr.Tab("πΎ Export Reports"):
|
714 |
+
gr.Markdown("### π₯ Download Your Analysis Report")
|
715 |
+
|
716 |
+
with gr.Row():
|
717 |
+
format_choice = gr.Radio(
|
718 |
+
choices=["HTML", "PDF", "Markdown"],
|
719 |
+
value="HTML",
|
720 |
+
label="π Report Format",
|
721 |
+
info="Choose your preferred download format"
|
722 |
+
)
|
723 |
+
|
724 |
+
download_btn = gr.Button("π₯ Generate & Download Report", variant="primary", size="lg")
|
725 |
+
download_status = gr.Textbox(label="Download Status", interactive=False)
|
726 |
+
download_file = gr.File(label="π Download Link", visible=True)
|
727 |
|
728 |
# Event handlers
|
729 |
def update_file_stats(file):
|
|
|
737 |
except:
|
738 |
return "File information unavailable"
|
739 |
|
740 |
+
def handle_analysis(file, api_key, user_question="", progress=gr.Progress()):
|
741 |
+
"""Handle main analysis and return all outputs including file name"""
|
742 |
+
result = sync_analyze_data(file, api_key, user_question, progress)
|
743 |
+
if len(result) == 5: # Check if file name was returned
|
744 |
+
return result[0], result[1], result[2], result[3], result[4] # analysis, summary, preview, charts, filename
|
745 |
+
else:
|
746 |
+
return result[0], result[1], result[2], result[3], "" # fallback without filename
|
747 |
+
|
748 |
+
def handle_question_analysis(file, api_key, question, progress=gr.Progress()):
|
749 |
+
"""Handle question-specific analysis"""
|
750 |
+
if not question.strip():
|
751 |
+
return "β Please enter a specific question about your data."
|
752 |
+
|
753 |
+
result = sync_analyze_data(file, api_key, question, progress)
|
754 |
+
return result[0] # Return only the analysis output
|
755 |
+
|
756 |
+
# Main analysis event
|
757 |
analyze_btn.click(
|
758 |
+
fn=handle_analysis,
|
759 |
inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
|
760 |
+
outputs=[analysis_output, raw_summary, data_preview, charts_output, current_file_name],
|
761 |
show_progress=True
|
762 |
)
|
763 |
|
764 |
# Follow-up questions
|
765 |
ask_btn.click(
|
766 |
+
fn=handle_question_analysis,
|
767 |
inputs=[file_input, api_key_input, question_input],
|
768 |
+
outputs=[question_output],
|
769 |
show_progress=True
|
770 |
)
|
771 |
|
|
|
780 |
clear_btn.click(
|
781 |
fn=clear_all,
|
782 |
outputs=[file_input, api_key_input, question_input, analysis_output,
|
783 |
+
question_output, data_preview, charts_output, raw_summary]
|
784 |
)
|
785 |
|
786 |
+
# Enhanced download functionality
|
787 |
download_btn.click(
|
788 |
+
fn=download_report,
|
789 |
+
inputs=[analysis_output, raw_summary, current_file_name, format_choice],
|
790 |
+
outputs=[download_file, download_status]
|
791 |
)
|
792 |
|
793 |
# Footer with usage tips
|
|
|
800 |
- Use descriptive column names
|
801 |
- Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
|
802 |
|
803 |
+
**π Visualizations Include:**
|
804 |
+
- Missing data analysis
|
805 |
+
- Correlation matrices for numerical data
|
806 |
+
- Distribution plots and histograms
|
807 |
+
- Top categories for categorical data
|
808 |
+
- Dataset overview metrics
|
809 |
+
|
810 |
+
**π₯ Export Options:**
|
811 |
+
- **HTML**: Interactive report with embedded charts
|
812 |
+
- **PDF**: Professional report for presentations
|
813 |
+
- **Markdown**: Simple text format for documentation
|
814 |
+
|
815 |
**β‘ Speed Optimization:**
|
816 |
- Files under 10MB process fastest
|
817 |
- CSV files typically load faster than Excel
|
|
|
820 |
**π§ Supported Formats:** CSV, XLSX, XLS | **π Max Size:** 50MB | **π Response Time:** ~3-5 seconds
|
821 |
""")
|
822 |
|
823 |
+
def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
|
824 |
+
"""Synchronous wrapper for the async analyze function"""
|
825 |
+
return asyncio.run(analyze_data(file, api_key, user_question, progress))
|
826 |
+
|
827 |
# Launch configuration
|
828 |
if __name__ == "__main__":
|
829 |
app.queue(max_size=10) # Handle multiple users
|
830 |
app.launch(
|
831 |
+
share=True
|
|
|
|
|
|
|
|
|
|
|
832 |
)
|