shukdevdattaEX commited on
Commit
a620a11
Β·
verified Β·
1 Parent(s): fc9cc46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +393 -145
app.py CHANGED
@@ -12,8 +12,10 @@ from typing import Optional, Tuple, Dict, Any
12
  import logging
13
  from datetime import datetime
14
  import re
15
- # import markdown
16
- from weasyprint import HTML as WeasyHTML
 
 
17
 
18
  # Configure logging
19
  logging.basicConfig(level=logging.INFO)
@@ -24,6 +26,8 @@ class EnhancedDataAnalyzer:
24
  self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
25
  self.max_file_size = 50 * 1024 * 1024 # 50MB limit
26
  self.conversation_history = []
 
 
27
 
28
  def validate_api_key(self, api_key: str) -> bool:
29
  """Validate API key format"""
@@ -129,7 +133,7 @@ Format your response with clear sections and bullet points for readability."""
129
  logger.error(f"API Error: {str(e)}")
130
  return f"❌ **Connection Error**: {str(e)}"
131
 
132
- def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, dict]:
133
  """Enhanced file processing with better error handling"""
134
  try:
135
  file_extension = os.path.splitext(file_path)[1].lower()
@@ -153,11 +157,14 @@ Format your response with clear sections and bullet points for readability."""
153
  # Clean column names
154
  df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
155
 
 
 
 
156
  # Generate enhanced summaries
157
  data_summary = self.generate_enhanced_summary(df)
158
- charts_data = self.generate_chart_data(df)
159
 
160
- return df, data_summary, charts_data
161
 
162
  except Exception as e:
163
  raise Exception(f"Error processing file: {str(e)}")
@@ -223,28 +230,231 @@ Format your response with clear sections and bullet points for readability."""
223
 
224
  return "\n".join(summary)
225
 
226
- def generate_chart_data(self, df: pd.DataFrame) -> dict:
227
- """Generate data for automatic visualizations"""
228
- charts = {}
229
-
230
- # Numerical distribution charts
231
- numeric_cols = df.select_dtypes(include=[np.number]).columns
232
- if len(numeric_cols) > 0:
233
- for col in numeric_cols[:3]: # First 3 numeric columns
234
- fig = px.histogram(df, x=col, title=f"Distribution of {col}")
235
- charts[f"hist_{col}"] = fig
236
 
237
- # Categorical charts
238
- categorical_cols = df.select_dtypes(include=['object', 'category']).columns
239
- if len(categorical_cols) > 0:
240
- for col in categorical_cols[:2]: # First 2 categorical columns
241
- if df[col].nunique() <= 20: # Only if reasonable number of categories
242
- value_counts = df[col].value_counts().head(10)
243
- fig = px.bar(x=value_counts.index, y=value_counts.values,
244
- title=f"Top Values in {col}")
245
- charts[f"bar_{col}"] = fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- return charts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
  # Initialize the analyzer
250
  analyzer = EnhancedDataAnalyzer()
@@ -252,31 +462,29 @@ analyzer = EnhancedDataAnalyzer()
252
  async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
253
  """Enhanced analysis function with progress tracking"""
254
  if not file:
255
- return "❌ Please upload a CSV or Excel file.", "", "", None
256
-
257
  if not analyzer.validate_api_key(api_key):
258
- return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", None
259
-
260
  # Validate file
261
  is_valid, validation_msg = analyzer.validate_file(file)
262
  if not is_valid:
263
- return f"❌ {validation_msg}", "", "", None
264
-
265
  progress(0.1, desc="πŸ“ Reading file...")
266
-
267
  try:
268
  # Process the uploaded file
269
- df, data_summary, charts_data = analyzer.process_file(file.name)
270
  progress(0.3, desc="πŸ“Š Processing data...")
271
-
272
- # Generate visualizations
273
- chart_html = create_basic_charts(df)
274
  progress(0.5, desc="πŸ€– Generating AI insights...")
275
-
276
  # Get AI analysis
277
  ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
278
  progress(0.9, desc="✨ Finalizing results...")
279
-
280
  # Format the complete response
281
  response = f"""# 🎯 Analysis Complete!
282
 
@@ -286,57 +494,46 @@ async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
286
  *Analysis powered by OpenAI gpt-oss-20b via Chutes β€’ Generated at {datetime.now().strftime('%H:%M:%S')}*
287
  """
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  progress(1.0, desc="βœ… Done!")
290
- return response, data_summary, df.head(15).to_html(classes="table table-striped"), chart_html
291
-
292
- except Exception as e:
293
- logger.error(f"Analysis error: {str(e)}")
294
- return f"❌ **Error**: {str(e)}", "", "", None
295
 
296
- def create_basic_charts(df: pd.DataFrame) -> str:
297
- """Create basic visualizations for the dataset"""
298
- charts_html = []
299
-
300
- try:
301
- # Chart 1: Data completeness heatmap
302
- missing_data = df.isnull().sum()
303
- if missing_data.sum() > 0:
304
- fig = px.bar(x=missing_data.index, y=missing_data.values,
305
- title="Missing Data by Column",
306
- labels={'x': 'Columns', 'y': 'Missing Count'})
307
- fig.update_layout(height=400, showlegend=False)
308
- charts_html.append(fig.to_html(include_plotlyjs='cdn'))
309
-
310
- # Chart 2: Numerical columns correlation (if multiple numeric columns)
311
- numeric_cols = df.select_dtypes(include=[np.number]).columns
312
- if len(numeric_cols) > 1:
313
- corr_matrix = df[numeric_cols].corr()
314
- fig = px.imshow(corr_matrix,
315
- title="Correlation Matrix",
316
- color_continuous_scale='RdBu_r',
317
- aspect="auto")
318
- fig.update_layout(height=500)
319
- charts_html.append(fig.to_html(include_plotlyjs='cdn'))
320
-
321
- # Chart 3: Distribution of first numeric column
322
- if len(numeric_cols) > 0:
323
- first_numeric = numeric_cols[0]
324
- fig = px.histogram(df, x=first_numeric,
325
- title=f"Distribution: {first_numeric}",
326
- marginal="box")
327
- fig.update_layout(height=400)
328
- charts_html.append(fig.to_html(include_plotlyjs='cdn'))
329
-
330
- # Additional charts from generate_chart_data
331
- charts_data = analyzer.generate_chart_data(df)
332
- for key, fig in charts_data.items():
333
- charts_html.append(fig.to_html(include_plotlyjs='cdn'))
334
-
335
- return "\n".join(charts_html) if charts_html else "<p>No charts generated for this dataset.</p>"
336
-
337
  except Exception as e:
338
- logger.error(f"Chart generation error: {str(e)}")
339
- return f"<p>Chart generation failed: {str(e)}</p>"
340
 
341
  def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
342
  """Synchronous wrapper for the async analyze function"""
@@ -344,15 +541,43 @@ def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
344
 
345
  def clear_all():
346
  """Clear all inputs and outputs"""
347
- return None, "", "", "", "", "", None
 
 
348
 
349
- def download_summary(analysis_text, data_summary, format_choice):
350
- """Generate downloadable summary report in chosen format"""
351
  if not analysis_text:
352
- return None
 
 
 
353
 
354
- report_md = f"""# Data Analysis Report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 
356
 
357
  ## AI Analysis:
358
  {analysis_text}
@@ -360,35 +585,15 @@ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
360
  ## Raw Data Summary:
361
  {data_summary}
362
  """
363
-
364
- base_filename = f"data_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
365
- filename = None
366
-
367
- try:
368
- if format_choice == "PDF":
369
- # Convert MD to HTML first
370
- report_html = markdown.markdown(report_md)
371
- # Wrap in basic HTML structure for better PDF rendering
372
- full_html = f"""
373
- <html>
374
- <head><style>body {{ font-family: Arial, sans-serif; }}</style></head>
375
- <body>{report_html}</body>
376
- </html>
377
- """
378
- filename = base_filename + ".pdf"
379
- WeasyHTML(string=full_html).write_pdf(filename)
380
-
381
- elif format_choice == "HTML":
382
- report_html = markdown.markdown(report_md, output_format='html5')
383
- filename = base_filename + ".html"
384
  with open(filename, 'w', encoding='utf-8') as f:
385
- f.write(report_html)
386
-
387
- return filename
388
-
389
  except Exception as e:
390
- logger.error(f"Download generation error: {str(e)}")
391
- return None
392
 
393
  # Create enhanced Gradio interface
394
  with gr.Blocks(
@@ -408,15 +613,25 @@ with gr.Blocks(
408
  text-align: center;
409
  background: #f8f9ff;
410
  }
 
 
 
 
 
 
 
411
  """
412
  ) as app:
413
 
 
 
 
414
  # Header
415
  gr.Markdown("""
416
  # πŸš€ Smart Data Analyzer Pro
417
  ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
418
 
419
- Upload your data files and get instant professional insights, visualizations, and recommendations!
420
  """)
421
 
422
  # Main interface
@@ -483,7 +698,8 @@ with gr.Blocks(
483
  with gr.Tab("πŸ“ˆ Visualizations"):
484
  charts_output = gr.HTML(
485
  label="Auto-Generated Charts",
486
- value="<p>Charts will appear here after analysis...</p>"
 
487
  )
488
 
489
  with gr.Tab("πŸ” Raw Summary"):
@@ -494,15 +710,20 @@ with gr.Blocks(
494
  show_copy_button=True
495
  )
496
 
497
- with gr.Tab("πŸ’Ύ Export"):
498
- gr.Markdown("### Download Your Analysis Report")
499
- format_choice = gr.Dropdown(
500
- choices=["PDF", "HTML"],
501
- label="Choose Format",
502
- value="PDF"
503
- )
504
- download_btn = gr.Button("πŸ“₯ Download Report", variant="secondary")
505
- download_file = gr.File(label="Download Link", visible=False)
 
 
 
 
 
506
 
507
  # Event handlers
508
  def update_file_stats(file):
@@ -516,19 +737,35 @@ with gr.Blocks(
516
  except:
517
  return "File information unavailable"
518
 
519
- # Main analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  analyze_btn.click(
521
- fn=sync_analyze_data,
522
  inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
523
- outputs=[analysis_output, raw_summary, data_preview, charts_output],
524
  show_progress=True
525
  )
526
 
527
  # Follow-up questions
528
  ask_btn.click(
529
- fn=sync_analyze_data,
530
  inputs=[file_input, api_key_input, question_input],
531
- outputs=[question_output, gr.Textbox(visible=False), gr.HTML(visible=False), charts_output], # Update charts on question too
532
  show_progress=True
533
  )
534
 
@@ -543,14 +780,14 @@ with gr.Blocks(
543
  clear_btn.click(
544
  fn=clear_all,
545
  outputs=[file_input, api_key_input, question_input, analysis_output,
546
- question_output, data_preview, charts_output]
547
  )
548
 
549
- # Download functionality
550
  download_btn.click(
551
- fn=download_summary,
552
- inputs=[analysis_output, raw_summary, format_choice],
553
- outputs=[download_file]
554
  )
555
 
556
  # Footer with usage tips
@@ -563,6 +800,18 @@ with gr.Blocks(
563
  - Use descriptive column names
564
  - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
565
 
 
 
 
 
 
 
 
 
 
 
 
 
566
  **⚑ Speed Optimization:**
567
  - Files under 10MB process fastest
568
  - CSV files typically load faster than Excel
@@ -571,14 +820,13 @@ with gr.Blocks(
571
  **πŸ”§ Supported Formats:** CSV, XLSX, XLS | **πŸ“ Max Size:** 50MB | **πŸš€ Response Time:** ~3-5 seconds
572
  """)
573
 
 
 
 
 
574
  # Launch configuration
575
  if __name__ == "__main__":
576
  app.queue(max_size=10) # Handle multiple users
577
  app.launch(
578
- share=True,
579
- server_name="0.0.0.0",
580
- server_port=7860,
581
- show_error=True,
582
- favicon_path=None,
583
- ssl_verify=False
584
  )
 
12
  import logging
13
  from datetime import datetime
14
  import re
15
+ import base64
16
+ from io import BytesIO
17
+ import weasyprint # For PDF generation
18
+ from jinja2 import Template # For HTML templating
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.INFO)
 
26
  self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
27
  self.max_file_size = 50 * 1024 * 1024 # 50MB limit
28
  self.conversation_history = []
29
+ self.current_df = None
30
+ self.current_charts = None
31
 
32
  def validate_api_key(self, api_key: str) -> bool:
33
  """Validate API key format"""
 
133
  logger.error(f"API Error: {str(e)}")
134
  return f"❌ **Connection Error**: {str(e)}"
135
 
136
+ def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, str]:
137
  """Enhanced file processing with better error handling"""
138
  try:
139
  file_extension = os.path.splitext(file_path)[1].lower()
 
157
  # Clean column names
158
  df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
159
 
160
+ # Store dataframe for visualizations
161
+ self.current_df = df
162
+
163
  # Generate enhanced summaries
164
  data_summary = self.generate_enhanced_summary(df)
165
+ charts_html = self.generate_visualizations(df)
166
 
167
+ return df, data_summary, charts_html
168
 
169
  except Exception as e:
170
  raise Exception(f"Error processing file: {str(e)}")
 
230
 
231
  return "\n".join(summary)
232
 
233
+ def generate_visualizations(self, df: pd.DataFrame) -> str:
234
+ """Generate comprehensive visualizations for the dataset"""
235
+ charts_html = []
 
 
 
 
 
 
 
236
 
237
+ try:
238
+ # Chart 1: Data completeness analysis
239
+ missing_data = df.isnull().sum()
240
+ if missing_data.sum() > 0:
241
+ fig = px.bar(
242
+ x=missing_data.index,
243
+ y=missing_data.values,
244
+ title="πŸ” Missing Data Analysis",
245
+ labels={'x': 'Columns', 'y': 'Missing Values Count'},
246
+ color=missing_data.values,
247
+ color_continuous_scale='Reds'
248
+ )
249
+ fig.update_layout(
250
+ height=400,
251
+ showlegend=False,
252
+ title_x=0.5,
253
+ xaxis_tickangle=-45
254
+ )
255
+ charts_html.append(f"<h3>πŸ“Š Data Quality Overview</h3>")
256
+ charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="missing_data_chart"))
257
+
258
+ # Chart 2: Numerical columns correlation heatmap
259
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
260
+ if len(numeric_cols) > 1:
261
+ corr_matrix = df[numeric_cols].corr()
262
+ fig = px.imshow(
263
+ corr_matrix,
264
+ title="πŸ”— Correlation Matrix - Numerical Variables",
265
+ color_continuous_scale='RdBu_r',
266
+ aspect="auto",
267
+ text_auto=True
268
+ )
269
+ fig.update_layout(height=500, title_x=0.5)
270
+ charts_html.append(f"<h3>πŸ“ˆ Correlation Analysis</h3>")
271
+ charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="correlation_chart"))
272
+
273
+ # Chart 3: Distribution plots for numerical columns
274
+ if len(numeric_cols) > 0:
275
+ for i, col in enumerate(numeric_cols[:3]): # First 3 numeric columns
276
+ fig = px.histogram(
277
+ df,
278
+ x=col,
279
+ title=f"πŸ“Š Distribution: {col}",
280
+ marginal="box",
281
+ nbins=30
282
+ )
283
+ fig.update_layout(height=400, title_x=0.5)
284
+ if i == 0:
285
+ charts_html.append(f"<h3>πŸ“ˆ Data Distributions</h3>")
286
+ charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"dist_chart_{i}"))
287
+
288
+ # Chart 4: Categorical analysis
289
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
290
+ if len(categorical_cols) > 0:
291
+ for i, col in enumerate(categorical_cols[:2]): # First 2 categorical columns
292
+ if df[col].nunique() <= 20: # Only if reasonable number of categories
293
+ value_counts = df[col].value_counts().head(10)
294
+ fig = px.bar(
295
+ x=value_counts.values,
296
+ y=value_counts.index,
297
+ orientation='h',
298
+ title=f"πŸ“Š Top 10 Values: {col}",
299
+ labels={'x': 'Count', 'y': col}
300
+ )
301
+ fig.update_layout(height=400, title_x=0.5)
302
+ if i == 0:
303
+ charts_html.append(f"<h3>πŸ“ Categorical Data Analysis</h3>")
304
+ charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"cat_chart_{i}"))
305
+
306
+ # Chart 5: Data overview summary
307
+ summary_data = {
308
+ 'Metric': ['Total Rows', 'Total Columns', 'Numeric Columns', 'Categorical Columns', 'Missing Values'],
309
+ 'Count': [
310
+ len(df),
311
+ len(df.columns),
312
+ len(numeric_cols),
313
+ len(categorical_cols),
314
+ df.isnull().sum().sum()
315
+ ]
316
+ }
317
+
318
+ fig = px.bar(
319
+ summary_data,
320
+ x='Metric',
321
+ y='Count',
322
+ title="πŸ“‹ Dataset Overview",
323
+ color='Count',
324
+ color_continuous_scale='Blues'
325
+ )
326
+ fig.update_layout(height=400, title_x=0.5, showlegend=False)
327
+ charts_html.append(f"<h3>πŸ“Š Dataset Overview</h3>")
328
+ charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="overview_chart"))
329
+
330
+ # Store charts for export
331
+ self.current_charts = charts_html
332
+
333
+ return "\n".join(charts_html) if charts_html else "<p>No charts could be generated for this dataset.</p>"
334
 
335
+ except Exception as e:
336
+ logger.error(f"Chart generation error: {str(e)}")
337
+ return f"<p>❌ Chart generation failed: {str(e)}</p>"
338
+
339
+ def generate_report_html(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str:
340
+ """Generate HTML report with embedded charts"""
341
+
342
+ html_template = """
343
+ <!DOCTYPE html>
344
+ <html>
345
+ <head>
346
+ <meta charset="UTF-8">
347
+ <title>Data Analysis Report</title>
348
+ <style>
349
+ body {
350
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
351
+ line-height: 1.6;
352
+ color: #333;
353
+ max-width: 1200px;
354
+ margin: 0 auto;
355
+ padding: 20px;
356
+ background: #f8f9fa;
357
+ }
358
+ .header {
359
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
360
+ color: white;
361
+ padding: 30px;
362
+ border-radius: 10px;
363
+ margin-bottom: 30px;
364
+ text-align: center;
365
+ }
366
+ .section {
367
+ background: white;
368
+ padding: 25px;
369
+ margin-bottom: 20px;
370
+ border-radius: 8px;
371
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
372
+ }
373
+ .chart-container {
374
+ margin: 20px 0;
375
+ padding: 15px;
376
+ background: #f8f9ff;
377
+ border-radius: 8px;
378
+ border-left: 4px solid #667eea;
379
+ }
380
+ h1, h2, h3 { color: #2c3e50; }
381
+ .metadata {
382
+ background: #e8f4f8;
383
+ padding: 15px;
384
+ border-radius: 5px;
385
+ margin-bottom: 20px;
386
+ }
387
+ .footer {
388
+ text-align: center;
389
+ color: #666;
390
+ margin-top: 40px;
391
+ padding: 20px;
392
+ background: #f1f1f1;
393
+ border-radius: 5px;
394
+ }
395
+ pre {
396
+ background: #f4f4f4;
397
+ padding: 15px;
398
+ border-radius: 5px;
399
+ overflow-x: auto;
400
+ white-space: pre-wrap;
401
+ }
402
+ </style>
403
+ </head>
404
+ <body>
405
+ <div class="header">
406
+ <h1>πŸš€ Smart Data Analysis Report</h1>
407
+ <p>Comprehensive AI-Powered Data Insights</p>
408
+ </div>
409
+
410
+ <div class="metadata">
411
+ <strong>πŸ“ File:</strong> {{ file_name }}<br>
412
+ <strong>πŸ“… Generated:</strong> {{ timestamp }}<br>
413
+ <strong>πŸ€– Model:</strong> OpenAI gpt-oss-20b via Chutes AI
414
+ </div>
415
+
416
+ <div class="section">
417
+ <h2>🎯 AI Analysis & Insights</h2>
418
+ <div>{{ ai_analysis }}</div>
419
+ </div>
420
+
421
+ <div class="section">
422
+ <h2>πŸ“Š Visualizations</h2>
423
+ <div class="chart-container">
424
+ {{ charts_html }}
425
+ </div>
426
+ </div>
427
+
428
+ <div class="section">
429
+ <h2>πŸ“‹ Technical Data Summary</h2>
430
+ <pre>{{ data_summary }}</pre>
431
+ </div>
432
+
433
+ <div class="footer">
434
+ <p>Report generated by Smart Data Analyzer Pro β€’ Powered by AI</p>
435
+ <p>For questions or support, visit chutes.ai</p>
436
+ </div>
437
+ </body>
438
+ </html>
439
+ """
440
+
441
+ template = Template(html_template)
442
+
443
+ # Convert markdown to HTML for AI analysis
444
+ ai_analysis_html = analysis_text.replace('\n', '<br>')
445
+ ai_analysis_html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', ai_analysis_html)
446
+ ai_analysis_html = re.sub(r'## (.*?)\n', r'<h3>\1</h3>', ai_analysis_html)
447
+ ai_analysis_html = re.sub(r'# (.*?)\n', r'<h2>\1</h2>', ai_analysis_html)
448
+
449
+ charts_content = "\n".join(self.current_charts) if self.current_charts else "<p>No visualizations available</p>"
450
+
451
+ return template.render(
452
+ file_name=file_name,
453
+ timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
454
+ ai_analysis=ai_analysis_html,
455
+ charts_html=charts_content,
456
+ data_summary=data_summary
457
+ )
458
 
459
  # Initialize the analyzer
460
  analyzer = EnhancedDataAnalyzer()
 
462
  async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
463
  """Enhanced analysis function with progress tracking"""
464
  if not file:
465
+ return "❌ Please upload a CSV or Excel file.", "", "", "", None
466
+
467
  if not analyzer.validate_api_key(api_key):
468
+ return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", "", None
469
+
470
  # Validate file
471
  is_valid, validation_msg = analyzer.validate_file(file)
472
  if not is_valid:
473
+ return f"❌ {validation_msg}", "", "", "", None
474
+
475
  progress(0.1, desc="πŸ“ Reading file...")
476
+
477
  try:
478
  # Process the uploaded file
479
+ df, data_summary, charts_html = analyzer.process_file(file.name)
480
  progress(0.3, desc="πŸ“Š Processing data...")
481
+
 
 
482
  progress(0.5, desc="πŸ€– Generating AI insights...")
483
+
484
  # Get AI analysis
485
  ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
486
  progress(0.9, desc="✨ Finalizing results...")
487
+
488
  # Format the complete response
489
  response = f"""# 🎯 Analysis Complete!
490
 
 
494
  *Analysis powered by OpenAI gpt-oss-20b via Chutes β€’ Generated at {datetime.now().strftime('%H:%M:%S')}*
495
  """
496
 
497
+ # Generate data preview
498
+ data_preview_html = df.head(15).to_html(
499
+ classes="table table-striped table-hover",
500
+ table_id="data-preview-table",
501
+ escape=False
502
+ )
503
+
504
+ # Add some styling to the preview
505
+ styled_preview = f"""
506
+ <style>
507
+ #data-preview-table {{
508
+ width: 100%;
509
+ border-collapse: collapse;
510
+ margin: 20px 0;
511
+ font-size: 14px;
512
+ }}
513
+ #data-preview-table th {{
514
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
515
+ color: white;
516
+ padding: 12px 8px;
517
+ text-align: left;
518
+ font-weight: bold;
519
+ }}
520
+ #data-preview-table td {{
521
+ padding: 10px 8px;
522
+ border-bottom: 1px solid #ddd;
523
+ }}
524
+ #data-preview-table tr:hover {{
525
+ background-color: #f5f5f5;
526
+ }}
527
+ </style>
528
+ {data_preview_html}
529
+ """
530
+
531
  progress(1.0, desc="βœ… Done!")
532
+ return response, data_summary, styled_preview, charts_html, file.name
 
 
 
 
533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  except Exception as e:
535
+ logger.error(f"Analysis error: {str(e)}")
536
+ return f"❌ **Error**: {str(e)}", "", "", "", None
537
 
538
  def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
539
  """Synchronous wrapper for the async analyze function"""
 
541
 
542
  def clear_all():
543
  """Clear all inputs and outputs"""
544
+ analyzer.current_df = None
545
+ analyzer.current_charts = None
546
+ return None, "", "", "", "", "", "", None
547
 
548
+ def download_report(analysis_text, data_summary, file_name, format_choice):
549
+ """Generate downloadable report in PDF or HTML format"""
550
  if not analysis_text:
551
+ return None, "❌ No analysis data available for download."
552
+
553
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
554
+ file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis"
555
 
556
+ try:
557
+ if format_choice == "HTML":
558
+ # Generate HTML report
559
+ html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
560
+ filename = f"{file_base_name}_analysis_report_{timestamp}.html"
561
+
562
+ with open(filename, 'w', encoding='utf-8') as f:
563
+ f.write(html_content)
564
+
565
+ return filename, f"βœ… HTML report generated successfully! File: {filename}"
566
+
567
+ elif format_choice == "PDF":
568
+ # Generate PDF report
569
+ html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
570
+ filename = f"{file_base_name}_analysis_report_{timestamp}.pdf"
571
+
572
+ # Convert HTML to PDF using weasyprint
573
+ weasyprint.HTML(string=html_content).write_pdf(filename)
574
+
575
+ return filename, f"βœ… PDF report generated successfully! File: {filename}"
576
+
577
+ else: # Markdown fallback
578
+ report = f"""# Data Analysis Report
579
  Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
580
+ File: {file_name}
581
 
582
  ## AI Analysis:
583
  {analysis_text}
 
585
  ## Raw Data Summary:
586
  {data_summary}
587
  """
588
+ filename = f"{file_base_name}_analysis_report_{timestamp}.md"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  with open(filename, 'w', encoding='utf-8') as f:
590
+ f.write(report)
591
+
592
+ return filename, f"βœ… Markdown report generated successfully! File: {filename}"
593
+
594
  except Exception as e:
595
+ logger.error(f"Report generation error: {str(e)}")
596
+ return None, f"❌ Error generating report: {str(e)}"
597
 
598
  # Create enhanced Gradio interface
599
  with gr.Blocks(
 
613
  text-align: center;
614
  background: #f8f9ff;
615
  }
616
+ .charts-container {
617
+ max-height: 800px;
618
+ overflow-y: auto;
619
+ padding: 10px;
620
+ background: #fafafa;
621
+ border-radius: 8px;
622
+ }
623
  """
624
  ) as app:
625
 
626
+ # Store file name for downloads
627
+ current_file_name = gr.State("")
628
+
629
  # Header
630
  gr.Markdown("""
631
  # πŸš€ Smart Data Analyzer Pro
632
  ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
633
 
634
+ Upload your data files and get instant professional insights, visualizations, and downloadable reports!
635
  """)
636
 
637
  # Main interface
 
698
  with gr.Tab("πŸ“ˆ Visualizations"):
699
  charts_output = gr.HTML(
700
  label="Auto-Generated Charts",
701
+ value="<div class='charts-container'><p>πŸ“Š Interactive charts will appear here after analysis...</p></div>",
702
+ elem_classes=["charts-container"]
703
  )
704
 
705
  with gr.Tab("πŸ” Raw Summary"):
 
710
  show_copy_button=True
711
  )
712
 
713
+ with gr.Tab("πŸ’Ύ Export Reports"):
714
+ gr.Markdown("### πŸ“₯ Download Your Analysis Report")
715
+
716
+ with gr.Row():
717
+ format_choice = gr.Radio(
718
+ choices=["HTML", "PDF", "Markdown"],
719
+ value="HTML",
720
+ label="πŸ“„ Report Format",
721
+ info="Choose your preferred download format"
722
+ )
723
+
724
+ download_btn = gr.Button("πŸ“₯ Generate & Download Report", variant="primary", size="lg")
725
+ download_status = gr.Textbox(label="Download Status", interactive=False)
726
+ download_file = gr.File(label="πŸ“„ Download Link", visible=True)
727
 
728
  # Event handlers
729
  def update_file_stats(file):
 
737
  except:
738
  return "File information unavailable"
739
 
740
+ def handle_analysis(file, api_key, user_question="", progress=gr.Progress()):
741
+ """Handle main analysis and return all outputs including file name"""
742
+ result = sync_analyze_data(file, api_key, user_question, progress)
743
+ if len(result) == 5: # Check if file name was returned
744
+ return result[0], result[1], result[2], result[3], result[4] # analysis, summary, preview, charts, filename
745
+ else:
746
+ return result[0], result[1], result[2], result[3], "" # fallback without filename
747
+
748
+ def handle_question_analysis(file, api_key, question, progress=gr.Progress()):
749
+ """Handle question-specific analysis"""
750
+ if not question.strip():
751
+ return "❓ Please enter a specific question about your data."
752
+
753
+ result = sync_analyze_data(file, api_key, question, progress)
754
+ return result[0] # Return only the analysis output
755
+
756
+ # Main analysis event
757
  analyze_btn.click(
758
+ fn=handle_analysis,
759
  inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
760
+ outputs=[analysis_output, raw_summary, data_preview, charts_output, current_file_name],
761
  show_progress=True
762
  )
763
 
764
  # Follow-up questions
765
  ask_btn.click(
766
+ fn=handle_question_analysis,
767
  inputs=[file_input, api_key_input, question_input],
768
+ outputs=[question_output],
769
  show_progress=True
770
  )
771
 
 
780
  clear_btn.click(
781
  fn=clear_all,
782
  outputs=[file_input, api_key_input, question_input, analysis_output,
783
+ question_output, data_preview, charts_output, raw_summary]
784
  )
785
 
786
+ # Enhanced download functionality
787
  download_btn.click(
788
+ fn=download_report,
789
+ inputs=[analysis_output, raw_summary, current_file_name, format_choice],
790
+ outputs=[download_file, download_status]
791
  )
792
 
793
  # Footer with usage tips
 
800
  - Use descriptive column names
801
  - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
802
 
803
+ **πŸ“Š Visualizations Include:**
804
+ - Missing data analysis
805
+ - Correlation matrices for numerical data
806
+ - Distribution plots and histograms
807
+ - Top categories for categorical data
808
+ - Dataset overview metrics
809
+
810
+ **πŸ“₯ Export Options:**
811
+ - **HTML**: Interactive report with embedded charts
812
+ - **PDF**: Professional report for presentations
813
+ - **Markdown**: Simple text format for documentation
814
+
815
  **⚑ Speed Optimization:**
816
  - Files under 10MB process fastest
817
  - CSV files typically load faster than Excel
 
820
  **πŸ”§ Supported Formats:** CSV, XLSX, XLS | **πŸ“ Max Size:** 50MB | **πŸš€ Response Time:** ~3-5 seconds
821
  """)
822
 
823
+ def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
824
+ """Synchronous wrapper for the async analyze function"""
825
+ return asyncio.run(analyze_data(file, api_key, user_question, progress))
826
+
827
  # Launch configuration
828
  if __name__ == "__main__":
829
  app.queue(max_size=10) # Handle multiple users
830
  app.launch(
831
+ share=True
 
 
 
 
 
832
  )